1/*
2 * ring buffer based function tracer
3 *
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6 *
7 * Originally taken from the RT patch by:
8 *    Arnaldo Carvalho de Melo <acme@redhat.com>
9 *
10 * Based on code from the latency_tracer, that is:
11 *  Copyright (C) 2004-2006 Ingo Molnar
12 *  Copyright (C) 2004 Nadia Yvette Chambers
13 */
14#include <linux/ring_buffer.h>
15#include <generated/utsrelease.h>
16#include <linux/stacktrace.h>
17#include <linux/writeback.h>
18#include <linux/kallsyms.h>
19#include <linux/seq_file.h>
20#include <linux/notifier.h>
21#include <linux/irqflags.h>
22#include <linux/debugfs.h>
23#include <linux/tracefs.h>
24#include <linux/pagemap.h>
25#include <linux/hardirq.h>
26#include <linux/linkage.h>
27#include <linux/uaccess.h>
28#include <linux/kprobes.h>
29#include <linux/ftrace.h>
30#include <linux/module.h>
31#include <linux/percpu.h>
32#include <linux/splice.h>
33#include <linux/kdebug.h>
34#include <linux/string.h>
35#include <linux/mount.h>
36#include <linux/rwsem.h>
37#include <linux/slab.h>
38#include <linux/ctype.h>
39#include <linux/init.h>
40#include <linux/poll.h>
41#include <linux/nmi.h>
42#include <linux/fs.h>
43#include <linux/sched/rt.h>
44
45#include "trace.h"
46#include "trace_output.h"
47
48/*
49 * On boot up, the ring buffer is set to the minimum size, so that
50 * we do not waste memory on systems that are not using tracing.
51 */
52bool ring_buffer_expanded;
53
54/*
55 * We need to change this state when a selftest is running.
56 * A selftest will lurk into the ring-buffer to count the
57 * entries inserted during the selftest although some concurrent
58 * insertions into the ring-buffer such as trace_printk could occurred
59 * at the same time, giving false positive or negative results.
60 */
61static bool __read_mostly tracing_selftest_running;
62
63/*
64 * If a tracer is running, we do not want to run SELFTEST.
65 */
66bool __read_mostly tracing_selftest_disabled;
67
68/* Pipe tracepoints to printk */
69struct trace_iterator *tracepoint_print_iter;
70int tracepoint_printk;
71
72/* For tracers that don't implement custom flags */
73static struct tracer_opt dummy_tracer_opt[] = {
74	{ }
75};
76
77static struct tracer_flags dummy_tracer_flags = {
78	.val = 0,
79	.opts = dummy_tracer_opt
80};
81
82static int
83dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84{
85	return 0;
86}
87
88/*
89 * To prevent the comm cache from being overwritten when no
90 * tracing is active, only save the comm when a trace event
91 * occurred.
92 */
93static DEFINE_PER_CPU(bool, trace_cmdline_save);
94
95/*
96 * Kill all tracing for good (never come back).
97 * It is initialized to 1 but will turn to zero if the initialization
98 * of the tracer is successful. But that is the only place that sets
99 * this back to zero.
100 */
101static int tracing_disabled = 1;
102
103cpumask_var_t __read_mostly	tracing_buffer_mask;
104
105/*
106 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
107 *
108 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
109 * is set, then ftrace_dump is called. This will output the contents
110 * of the ftrace buffers to the console.  This is very useful for
111 * capturing traces that lead to crashes and outputing it to a
112 * serial console.
113 *
114 * It is default off, but you can enable it with either specifying
115 * "ftrace_dump_on_oops" in the kernel command line, or setting
116 * /proc/sys/kernel/ftrace_dump_on_oops
117 * Set 1 if you want to dump buffers of all CPUs
118 * Set 2 if you want to dump the buffer of the CPU that triggered oops
119 */
120
121enum ftrace_dump_mode ftrace_dump_on_oops;
122
123/* When set, tracing will stop when a WARN*() is hit */
124int __disable_trace_on_warning;
125
126#ifdef CONFIG_TRACE_ENUM_MAP_FILE
127/* Map of enums to their values, for "enum_map" file */
128struct trace_enum_map_head {
129	struct module			*mod;
130	unsigned long			length;
131};
132
133union trace_enum_map_item;
134
135struct trace_enum_map_tail {
136	/*
137	 * "end" is first and points to NULL as it must be different
138	 * than "mod" or "enum_string"
139	 */
140	union trace_enum_map_item	*next;
141	const char			*end;	/* points to NULL */
142};
143
144static DEFINE_MUTEX(trace_enum_mutex);
145
146/*
147 * The trace_enum_maps are saved in an array with two extra elements,
148 * one at the beginning, and one at the end. The beginning item contains
149 * the count of the saved maps (head.length), and the module they
150 * belong to if not built in (head.mod). The ending item contains a
151 * pointer to the next array of saved enum_map items.
152 */
153union trace_enum_map_item {
154	struct trace_enum_map		map;
155	struct trace_enum_map_head	head;
156	struct trace_enum_map_tail	tail;
157};
158
159static union trace_enum_map_item *trace_enum_maps;
160#endif /* CONFIG_TRACE_ENUM_MAP_FILE */
161
162static int tracing_set_tracer(struct trace_array *tr, const char *buf);
163
164#define MAX_TRACER_SIZE		100
165static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
166static char *default_bootup_tracer;
167
168static bool allocate_snapshot;
169
170static int __init set_cmdline_ftrace(char *str)
171{
172	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
173	default_bootup_tracer = bootup_tracer_buf;
174	/* We are using ftrace early, expand it */
175	ring_buffer_expanded = true;
176	return 1;
177}
178__setup("ftrace=", set_cmdline_ftrace);
179
180static int __init set_ftrace_dump_on_oops(char *str)
181{
182	if (*str++ != '=' || !*str) {
183		ftrace_dump_on_oops = DUMP_ALL;
184		return 1;
185	}
186
187	if (!strcmp("orig_cpu", str)) {
188		ftrace_dump_on_oops = DUMP_ORIG;
189                return 1;
190        }
191
192        return 0;
193}
194__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
195
196static int __init stop_trace_on_warning(char *str)
197{
198	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
199		__disable_trace_on_warning = 1;
200	return 1;
201}
202__setup("traceoff_on_warning", stop_trace_on_warning);
203
204static int __init boot_alloc_snapshot(char *str)
205{
206	allocate_snapshot = true;
207	/* We also need the main ring buffer expanded */
208	ring_buffer_expanded = true;
209	return 1;
210}
211__setup("alloc_snapshot", boot_alloc_snapshot);
212
213
214static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
215
216static int __init set_trace_boot_options(char *str)
217{
218	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
219	return 0;
220}
221__setup("trace_options=", set_trace_boot_options);
222
223static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
224static char *trace_boot_clock __initdata;
225
226static int __init set_trace_boot_clock(char *str)
227{
228	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
229	trace_boot_clock = trace_boot_clock_buf;
230	return 0;
231}
232__setup("trace_clock=", set_trace_boot_clock);
233
234static int __init set_tracepoint_printk(char *str)
235{
236	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
237		tracepoint_printk = 1;
238	return 1;
239}
240__setup("tp_printk", set_tracepoint_printk);
241
242unsigned long long ns2usecs(cycle_t nsec)
243{
244	nsec += 500;
245	do_div(nsec, 1000);
246	return nsec;
247}
248
249/* trace_flags holds trace_options default values */
250#define TRACE_DEFAULT_FLAGS						\
251	(FUNCTION_DEFAULT_FLAGS |					\
252	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
253	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
254	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
255	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
256
257/* trace_options that are only supported by global_trace */
258#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
259	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
260
261
262/*
263 * The global_trace is the descriptor that holds the tracing
264 * buffers for the live tracing. For each CPU, it contains
265 * a link list of pages that will store trace entries. The
266 * page descriptor of the pages in the memory is used to hold
267 * the link list by linking the lru item in the page descriptor
268 * to each of the pages in the buffer per CPU.
269 *
270 * For each active CPU there is a data field that holds the
271 * pages for the buffer for that CPU. Each CPU has the same number
272 * of pages allocated for its buffer.
273 */
274static struct trace_array global_trace = {
275	.trace_flags = TRACE_DEFAULT_FLAGS,
276};
277
278LIST_HEAD(ftrace_trace_arrays);
279
280int trace_array_get(struct trace_array *this_tr)
281{
282	struct trace_array *tr;
283	int ret = -ENODEV;
284
285	mutex_lock(&trace_types_lock);
286	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287		if (tr == this_tr) {
288			tr->ref++;
289			ret = 0;
290			break;
291		}
292	}
293	mutex_unlock(&trace_types_lock);
294
295	return ret;
296}
297
298static void __trace_array_put(struct trace_array *this_tr)
299{
300	WARN_ON(!this_tr->ref);
301	this_tr->ref--;
302}
303
304void trace_array_put(struct trace_array *this_tr)
305{
306	mutex_lock(&trace_types_lock);
307	__trace_array_put(this_tr);
308	mutex_unlock(&trace_types_lock);
309}
310
311int filter_check_discard(struct trace_event_file *file, void *rec,
312			 struct ring_buffer *buffer,
313			 struct ring_buffer_event *event)
314{
315	if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
316	    !filter_match_preds(file->filter, rec)) {
317		ring_buffer_discard_commit(buffer, event);
318		return 1;
319	}
320
321	return 0;
322}
323EXPORT_SYMBOL_GPL(filter_check_discard);
324
325int call_filter_check_discard(struct trace_event_call *call, void *rec,
326			      struct ring_buffer *buffer,
327			      struct ring_buffer_event *event)
328{
329	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
330	    !filter_match_preds(call->filter, rec)) {
331		ring_buffer_discard_commit(buffer, event);
332		return 1;
333	}
334
335	return 0;
336}
337EXPORT_SYMBOL_GPL(call_filter_check_discard);
338
339static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
340{
341	u64 ts;
342
343	/* Early boot up does not have a buffer yet */
344	if (!buf->buffer)
345		return trace_clock_local();
346
347	ts = ring_buffer_time_stamp(buf->buffer, cpu);
348	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
349
350	return ts;
351}
352
353cycle_t ftrace_now(int cpu)
354{
355	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
356}
357
358/**
359 * tracing_is_enabled - Show if global_trace has been disabled
360 *
361 * Shows if the global trace has been enabled or not. It uses the
362 * mirror flag "buffer_disabled" to be used in fast paths such as for
363 * the irqsoff tracer. But it may be inaccurate due to races. If you
364 * need to know the accurate state, use tracing_is_on() which is a little
365 * slower, but accurate.
366 */
367int tracing_is_enabled(void)
368{
369	/*
370	 * For quick access (irqsoff uses this in fast path), just
371	 * return the mirror variable of the state of the ring buffer.
372	 * It's a little racy, but we don't really care.
373	 */
374	smp_rmb();
375	return !global_trace.buffer_disabled;
376}
377
378/*
379 * trace_buf_size is the size in bytes that is allocated
380 * for a buffer. Note, the number of bytes is always rounded
381 * to page size.
382 *
383 * This number is purposely set to a low number of 16384.
384 * If the dump on oops happens, it will be much appreciated
385 * to not have to wait for all that output. Anyway this can be
386 * boot time and run time configurable.
387 */
388#define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
389
390static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
391
392/* trace_types holds a link list of available tracers. */
393static struct tracer		*trace_types __read_mostly;
394
395/*
396 * trace_types_lock is used to protect the trace_types list.
397 */
398DEFINE_MUTEX(trace_types_lock);
399
400/*
401 * serialize the access of the ring buffer
402 *
403 * ring buffer serializes readers, but it is low level protection.
404 * The validity of the events (which returns by ring_buffer_peek() ..etc)
405 * are not protected by ring buffer.
406 *
407 * The content of events may become garbage if we allow other process consumes
408 * these events concurrently:
409 *   A) the page of the consumed events may become a normal page
410 *      (not reader page) in ring buffer, and this page will be rewrited
411 *      by events producer.
412 *   B) The page of the consumed events may become a page for splice_read,
413 *      and this page will be returned to system.
414 *
415 * These primitives allow multi process access to different cpu ring buffer
416 * concurrently.
417 *
418 * These primitives don't distinguish read-only and read-consume access.
419 * Multi read-only access are also serialized.
420 */
421
422#ifdef CONFIG_SMP
423static DECLARE_RWSEM(all_cpu_access_lock);
424static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
425
426static inline void trace_access_lock(int cpu)
427{
428	if (cpu == RING_BUFFER_ALL_CPUS) {
429		/* gain it for accessing the whole ring buffer. */
430		down_write(&all_cpu_access_lock);
431	} else {
432		/* gain it for accessing a cpu ring buffer. */
433
434		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
435		down_read(&all_cpu_access_lock);
436
437		/* Secondly block other access to this @cpu ring buffer. */
438		mutex_lock(&per_cpu(cpu_access_lock, cpu));
439	}
440}
441
442static inline void trace_access_unlock(int cpu)
443{
444	if (cpu == RING_BUFFER_ALL_CPUS) {
445		up_write(&all_cpu_access_lock);
446	} else {
447		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
448		up_read(&all_cpu_access_lock);
449	}
450}
451
452static inline void trace_access_lock_init(void)
453{
454	int cpu;
455
456	for_each_possible_cpu(cpu)
457		mutex_init(&per_cpu(cpu_access_lock, cpu));
458}
459
460#else
461
462static DEFINE_MUTEX(access_lock);
463
464static inline void trace_access_lock(int cpu)
465{
466	(void)cpu;
467	mutex_lock(&access_lock);
468}
469
470static inline void trace_access_unlock(int cpu)
471{
472	(void)cpu;
473	mutex_unlock(&access_lock);
474}
475
476static inline void trace_access_lock_init(void)
477{
478}
479
480#endif
481
482#ifdef CONFIG_STACKTRACE
483static void __ftrace_trace_stack(struct ring_buffer *buffer,
484				 unsigned long flags,
485				 int skip, int pc, struct pt_regs *regs);
486static inline void ftrace_trace_stack(struct trace_array *tr,
487				      struct ring_buffer *buffer,
488				      unsigned long flags,
489				      int skip, int pc, struct pt_regs *regs);
490
491#else
492static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
493					unsigned long flags,
494					int skip, int pc, struct pt_regs *regs)
495{
496}
497static inline void ftrace_trace_stack(struct trace_array *tr,
498				      struct ring_buffer *buffer,
499				      unsigned long flags,
500				      int skip, int pc, struct pt_regs *regs)
501{
502}
503
504#endif
505
506static void tracer_tracing_on(struct trace_array *tr)
507{
508	if (tr->trace_buffer.buffer)
509		ring_buffer_record_on(tr->trace_buffer.buffer);
510	/*
511	 * This flag is looked at when buffers haven't been allocated
512	 * yet, or by some tracers (like irqsoff), that just want to
513	 * know if the ring buffer has been disabled, but it can handle
514	 * races of where it gets disabled but we still do a record.
515	 * As the check is in the fast path of the tracers, it is more
516	 * important to be fast than accurate.
517	 */
518	tr->buffer_disabled = 0;
519	/* Make the flag seen by readers */
520	smp_wmb();
521}
522
523/**
524 * tracing_on - enable tracing buffers
525 *
526 * This function enables tracing buffers that may have been
527 * disabled with tracing_off.
528 */
529void tracing_on(void)
530{
531	tracer_tracing_on(&global_trace);
532}
533EXPORT_SYMBOL_GPL(tracing_on);
534
535/**
536 * __trace_puts - write a constant string into the trace buffer.
537 * @ip:	   The address of the caller
538 * @str:   The constant string to write
539 * @size:  The size of the string.
540 */
541int __trace_puts(unsigned long ip, const char *str, int size)
542{
543	struct ring_buffer_event *event;
544	struct ring_buffer *buffer;
545	struct print_entry *entry;
546	unsigned long irq_flags;
547	int alloc;
548	int pc;
549
550	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
551		return 0;
552
553	pc = preempt_count();
554
555	if (unlikely(tracing_selftest_running || tracing_disabled))
556		return 0;
557
558	alloc = sizeof(*entry) + size + 2; /* possible \n added */
559
560	local_save_flags(irq_flags);
561	buffer = global_trace.trace_buffer.buffer;
562	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
563					  irq_flags, pc);
564	if (!event)
565		return 0;
566
567	entry = ring_buffer_event_data(event);
568	entry->ip = ip;
569
570	memcpy(&entry->buf, str, size);
571
572	/* Add a newline if necessary */
573	if (entry->buf[size - 1] != '\n') {
574		entry->buf[size] = '\n';
575		entry->buf[size + 1] = '\0';
576	} else
577		entry->buf[size] = '\0';
578
579	__buffer_unlock_commit(buffer, event);
580	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
581
582	return size;
583}
584EXPORT_SYMBOL_GPL(__trace_puts);
585
586/**
587 * __trace_bputs - write the pointer to a constant string into trace buffer
588 * @ip:	   The address of the caller
589 * @str:   The constant string to write to the buffer to
590 */
591int __trace_bputs(unsigned long ip, const char *str)
592{
593	struct ring_buffer_event *event;
594	struct ring_buffer *buffer;
595	struct bputs_entry *entry;
596	unsigned long irq_flags;
597	int size = sizeof(struct bputs_entry);
598	int pc;
599
600	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
601		return 0;
602
603	pc = preempt_count();
604
605	if (unlikely(tracing_selftest_running || tracing_disabled))
606		return 0;
607
608	local_save_flags(irq_flags);
609	buffer = global_trace.trace_buffer.buffer;
610	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
611					  irq_flags, pc);
612	if (!event)
613		return 0;
614
615	entry = ring_buffer_event_data(event);
616	entry->ip			= ip;
617	entry->str			= str;
618
619	__buffer_unlock_commit(buffer, event);
620	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
621
622	return 1;
623}
624EXPORT_SYMBOL_GPL(__trace_bputs);
625
626#ifdef CONFIG_TRACER_SNAPSHOT
627/**
628 * trace_snapshot - take a snapshot of the current buffer.
629 *
630 * This causes a swap between the snapshot buffer and the current live
631 * tracing buffer. You can use this to take snapshots of the live
632 * trace when some condition is triggered, but continue to trace.
633 *
634 * Note, make sure to allocate the snapshot with either
635 * a tracing_snapshot_alloc(), or by doing it manually
636 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
637 *
638 * If the snapshot buffer is not allocated, it will stop tracing.
639 * Basically making a permanent snapshot.
640 */
641void tracing_snapshot(void)
642{
643	struct trace_array *tr = &global_trace;
644	struct tracer *tracer = tr->current_trace;
645	unsigned long flags;
646
647	if (in_nmi()) {
648		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
649		internal_trace_puts("*** snapshot is being ignored        ***\n");
650		return;
651	}
652
653	if (!tr->allocated_snapshot) {
654		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
655		internal_trace_puts("*** stopping trace here!   ***\n");
656		tracing_off();
657		return;
658	}
659
660	/* Note, snapshot can not be used when the tracer uses it */
661	if (tracer->use_max_tr) {
662		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
663		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
664		return;
665	}
666
667	local_irq_save(flags);
668	update_max_tr(tr, current, smp_processor_id());
669	local_irq_restore(flags);
670}
671EXPORT_SYMBOL_GPL(tracing_snapshot);
672
673static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
674					struct trace_buffer *size_buf, int cpu_id);
675static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
676
677static int alloc_snapshot(struct trace_array *tr)
678{
679	int ret;
680
681	if (!tr->allocated_snapshot) {
682
683		/* allocate spare buffer */
684		ret = resize_buffer_duplicate_size(&tr->max_buffer,
685				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
686		if (ret < 0)
687			return ret;
688
689		tr->allocated_snapshot = true;
690	}
691
692	return 0;
693}
694
695static void free_snapshot(struct trace_array *tr)
696{
697	/*
698	 * We don't free the ring buffer. instead, resize it because
699	 * The max_tr ring buffer has some state (e.g. ring->clock) and
700	 * we want preserve it.
701	 */
702	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
703	set_buffer_entries(&tr->max_buffer, 1);
704	tracing_reset_online_cpus(&tr->max_buffer);
705	tr->allocated_snapshot = false;
706}
707
708/**
709 * tracing_alloc_snapshot - allocate snapshot buffer.
710 *
711 * This only allocates the snapshot buffer if it isn't already
712 * allocated - it doesn't also take a snapshot.
713 *
714 * This is meant to be used in cases where the snapshot buffer needs
715 * to be set up for events that can't sleep but need to be able to
716 * trigger a snapshot.
717 */
718int tracing_alloc_snapshot(void)
719{
720	struct trace_array *tr = &global_trace;
721	int ret;
722
723	ret = alloc_snapshot(tr);
724	WARN_ON(ret < 0);
725
726	return ret;
727}
728EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
729
730/**
731 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
732 *
733 * This is similar to trace_snapshot(), but it will allocate the
734 * snapshot buffer if it isn't already allocated. Use this only
735 * where it is safe to sleep, as the allocation may sleep.
736 *
737 * This causes a swap between the snapshot buffer and the current live
738 * tracing buffer. You can use this to take snapshots of the live
739 * trace when some condition is triggered, but continue to trace.
740 */
741void tracing_snapshot_alloc(void)
742{
743	int ret;
744
745	ret = tracing_alloc_snapshot();
746	if (ret < 0)
747		return;
748
749	tracing_snapshot();
750}
751EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
752#else
753void tracing_snapshot(void)
754{
755	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
756}
757EXPORT_SYMBOL_GPL(tracing_snapshot);
758int tracing_alloc_snapshot(void)
759{
760	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
761	return -ENODEV;
762}
763EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
764void tracing_snapshot_alloc(void)
765{
766	/* Give warning */
767	tracing_snapshot();
768}
769EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
770#endif /* CONFIG_TRACER_SNAPSHOT */
771
772static void tracer_tracing_off(struct trace_array *tr)
773{
774	if (tr->trace_buffer.buffer)
775		ring_buffer_record_off(tr->trace_buffer.buffer);
776	/*
777	 * This flag is looked at when buffers haven't been allocated
778	 * yet, or by some tracers (like irqsoff), that just want to
779	 * know if the ring buffer has been disabled, but it can handle
780	 * races of where it gets disabled but we still do a record.
781	 * As the check is in the fast path of the tracers, it is more
782	 * important to be fast than accurate.
783	 */
784	tr->buffer_disabled = 1;
785	/* Make the flag seen by readers */
786	smp_wmb();
787}
788
789/**
790 * tracing_off - turn off tracing buffers
791 *
792 * This function stops the tracing buffers from recording data.
793 * It does not disable any overhead the tracers themselves may
794 * be causing. This function simply causes all recording to
795 * the ring buffers to fail.
796 */
797void tracing_off(void)
798{
799	tracer_tracing_off(&global_trace);
800}
801EXPORT_SYMBOL_GPL(tracing_off);
802
803void disable_trace_on_warning(void)
804{
805	if (__disable_trace_on_warning)
806		tracing_off();
807}
808
809/**
810 * tracer_tracing_is_on - show real state of ring buffer enabled
811 * @tr : the trace array to know if ring buffer is enabled
812 *
813 * Shows real state of the ring buffer if it is enabled or not.
814 */
815static int tracer_tracing_is_on(struct trace_array *tr)
816{
817	if (tr->trace_buffer.buffer)
818		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
819	return !tr->buffer_disabled;
820}
821
822/**
823 * tracing_is_on - show state of ring buffers enabled
824 */
825int tracing_is_on(void)
826{
827	return tracer_tracing_is_on(&global_trace);
828}
829EXPORT_SYMBOL_GPL(tracing_is_on);
830
831static int __init set_buf_size(char *str)
832{
833	unsigned long buf_size;
834
835	if (!str)
836		return 0;
837	buf_size = memparse(str, &str);
838	/* nr_entries can not be zero */
839	if (buf_size == 0)
840		return 0;
841	trace_buf_size = buf_size;
842	return 1;
843}
844__setup("trace_buf_size=", set_buf_size);
845
846static int __init set_tracing_thresh(char *str)
847{
848	unsigned long threshold;
849	int ret;
850
851	if (!str)
852		return 0;
853	ret = kstrtoul(str, 0, &threshold);
854	if (ret < 0)
855		return 0;
856	tracing_thresh = threshold * 1000;
857	return 1;
858}
859__setup("tracing_thresh=", set_tracing_thresh);
860
861unsigned long nsecs_to_usecs(unsigned long nsecs)
862{
863	return nsecs / 1000;
864}
865
866/*
867 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
868 * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
869 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
870 * of strings in the order that the enums were defined.
871 */
872#undef C
873#define C(a, b) b
874
875/* These must match the bit postions in trace_iterator_flags */
876static const char *trace_options[] = {
877	TRACE_FLAGS
878	NULL
879};
880
881static struct {
882	u64 (*func)(void);
883	const char *name;
884	int in_ns;		/* is this clock in nanoseconds? */
885} trace_clocks[] = {
886	{ trace_clock_local,		"local",	1 },
887	{ trace_clock_global,		"global",	1 },
888	{ trace_clock_counter,		"counter",	0 },
889	{ trace_clock_jiffies,		"uptime",	0 },
890	{ trace_clock,			"perf",		1 },
891	{ ktime_get_mono_fast_ns,	"mono",		1 },
892	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
893	ARCH_TRACE_CLOCKS
894};
895
896/*
897 * trace_parser_get_init - gets the buffer for trace parser
898 */
899int trace_parser_get_init(struct trace_parser *parser, int size)
900{
901	memset(parser, 0, sizeof(*parser));
902
903	parser->buffer = kmalloc(size, GFP_KERNEL);
904	if (!parser->buffer)
905		return 1;
906
907	parser->size = size;
908	return 0;
909}
910
911/*
912 * trace_parser_put - frees the buffer for trace parser
913 */
914void trace_parser_put(struct trace_parser *parser)
915{
916	kfree(parser->buffer);
917}
918
919/*
920 * trace_get_user - reads the user input string separated by  space
921 * (matched by isspace(ch))
922 *
923 * For each string found the 'struct trace_parser' is updated,
924 * and the function returns.
925 *
926 * Returns number of bytes read.
927 *
928 * See kernel/trace/trace.h for 'struct trace_parser' details.
929 */
930int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
931	size_t cnt, loff_t *ppos)
932{
933	char ch;
934	size_t read = 0;
935	ssize_t ret;
936
937	if (!*ppos)
938		trace_parser_clear(parser);
939
940	ret = get_user(ch, ubuf++);
941	if (ret)
942		goto out;
943
944	read++;
945	cnt--;
946
947	/*
948	 * The parser is not finished with the last write,
949	 * continue reading the user input without skipping spaces.
950	 */
951	if (!parser->cont) {
952		/* skip white space */
953		while (cnt && isspace(ch)) {
954			ret = get_user(ch, ubuf++);
955			if (ret)
956				goto out;
957			read++;
958			cnt--;
959		}
960
961		/* only spaces were written */
962		if (isspace(ch)) {
963			*ppos += read;
964			ret = read;
965			goto out;
966		}
967
968		parser->idx = 0;
969	}
970
971	/* read the non-space input */
972	while (cnt && !isspace(ch)) {
973		if (parser->idx < parser->size - 1)
974			parser->buffer[parser->idx++] = ch;
975		else {
976			ret = -EINVAL;
977			goto out;
978		}
979		ret = get_user(ch, ubuf++);
980		if (ret)
981			goto out;
982		read++;
983		cnt--;
984	}
985
986	/* We either got finished input or we have to wait for another call. */
987	if (isspace(ch)) {
988		parser->buffer[parser->idx] = 0;
989		parser->cont = false;
990	} else if (parser->idx < parser->size - 1) {
991		parser->cont = true;
992		parser->buffer[parser->idx++] = ch;
993	} else {
994		ret = -EINVAL;
995		goto out;
996	}
997
998	*ppos += read;
999	ret = read;
1000
1001out:
1002	return ret;
1003}
1004
1005/* TODO add a seq_buf_to_buffer() */
1006static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1007{
1008	int len;
1009
1010	if (trace_seq_used(s) <= s->seq.readpos)
1011		return -EBUSY;
1012
1013	len = trace_seq_used(s) - s->seq.readpos;
1014	if (cnt > len)
1015		cnt = len;
1016	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1017
1018	s->seq.readpos += cnt;
1019	return cnt;
1020}
1021
1022unsigned long __read_mostly	tracing_thresh;
1023
1024#ifdef CONFIG_TRACER_MAX_TRACE
1025/*
1026 * Copy the new maximum trace into the separate maximum-trace
1027 * structure. (this way the maximum trace is permanently saved,
1028 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1029 */
1030static void
1031__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1032{
1033	struct trace_buffer *trace_buf = &tr->trace_buffer;
1034	struct trace_buffer *max_buf = &tr->max_buffer;
1035	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1036	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1037
1038	max_buf->cpu = cpu;
1039	max_buf->time_start = data->preempt_timestamp;
1040
1041	max_data->saved_latency = tr->max_latency;
1042	max_data->critical_start = data->critical_start;
1043	max_data->critical_end = data->critical_end;
1044
1045	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1046	max_data->pid = tsk->pid;
1047	/*
1048	 * If tsk == current, then use current_uid(), as that does not use
1049	 * RCU. The irq tracer can be called out of RCU scope.
1050	 */
1051	if (tsk == current)
1052		max_data->uid = current_uid();
1053	else
1054		max_data->uid = task_uid(tsk);
1055
1056	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1057	max_data->policy = tsk->policy;
1058	max_data->rt_priority = tsk->rt_priority;
1059
1060	/* record this tasks comm */
1061	tracing_record_cmdline(tsk);
1062}
1063
1064/**
1065 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1066 * @tr: tracer
1067 * @tsk: the task with the latency
1068 * @cpu: The cpu that initiated the trace.
1069 *
1070 * Flip the buffers between the @tr and the max_tr and record information
1071 * about which task was the cause of this latency.
1072 */
1073void
1074update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1075{
1076	struct ring_buffer *buf;
1077
1078	if (tr->stop_count)
1079		return;
1080
1081	WARN_ON_ONCE(!irqs_disabled());
1082
1083	if (!tr->allocated_snapshot) {
1084		/* Only the nop tracer should hit this when disabling */
1085		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1086		return;
1087	}
1088
1089	arch_spin_lock(&tr->max_lock);
1090
1091	buf = tr->trace_buffer.buffer;
1092	tr->trace_buffer.buffer = tr->max_buffer.buffer;
1093	tr->max_buffer.buffer = buf;
1094
1095	__update_max_tr(tr, tsk, cpu);
1096	arch_spin_unlock(&tr->max_lock);
1097}
1098
1099/**
1100 * update_max_tr_single - only copy one trace over, and reset the rest
1101 * @tr - tracer
1102 * @tsk - task with the latency
1103 * @cpu - the cpu of the buffer to copy.
1104 *
1105 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1106 */
1107void
1108update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1109{
1110	int ret;
1111
1112	if (tr->stop_count)
1113		return;
1114
1115	WARN_ON_ONCE(!irqs_disabled());
1116	if (!tr->allocated_snapshot) {
1117		/* Only the nop tracer should hit this when disabling */
1118		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1119		return;
1120	}
1121
1122	arch_spin_lock(&tr->max_lock);
1123
1124	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1125
1126	if (ret == -EBUSY) {
1127		/*
1128		 * We failed to swap the buffer due to a commit taking
1129		 * place on this CPU. We fail to record, but we reset
1130		 * the max trace buffer (no one writes directly to it)
1131		 * and flag that it failed.
1132		 */
1133		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1134			"Failed to swap buffers due to commit in progress\n");
1135	}
1136
1137	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1138
1139	__update_max_tr(tr, tsk, cpu);
1140	arch_spin_unlock(&tr->max_lock);
1141}
1142#endif /* CONFIG_TRACER_MAX_TRACE */
1143
1144static int wait_on_pipe(struct trace_iterator *iter, bool full)
1145{
1146	/* Iterators are static, they should be filled or empty */
1147	if (trace_buffer_iter(iter, iter->cpu_file))
1148		return 0;
1149
1150	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1151				full);
1152}
1153
1154#ifdef CONFIG_FTRACE_STARTUP_TEST
1155static int run_tracer_selftest(struct tracer *type)
1156{
1157	struct trace_array *tr = &global_trace;
1158	struct tracer *saved_tracer = tr->current_trace;
1159	int ret;
1160
1161	if (!type->selftest || tracing_selftest_disabled)
1162		return 0;
1163
1164	/*
1165	 * Run a selftest on this tracer.
1166	 * Here we reset the trace buffer, and set the current
1167	 * tracer to be this tracer. The tracer can then run some
1168	 * internal tracing to verify that everything is in order.
1169	 * If we fail, we do not register this tracer.
1170	 */
1171	tracing_reset_online_cpus(&tr->trace_buffer);
1172
1173	tr->current_trace = type;
1174
1175#ifdef CONFIG_TRACER_MAX_TRACE
1176	if (type->use_max_tr) {
1177		/* If we expanded the buffers, make sure the max is expanded too */
1178		if (ring_buffer_expanded)
1179			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1180					   RING_BUFFER_ALL_CPUS);
1181		tr->allocated_snapshot = true;
1182	}
1183#endif
1184
1185	/* the test is responsible for initializing and enabling */
1186	pr_info("Testing tracer %s: ", type->name);
1187	ret = type->selftest(type, tr);
1188	/* the test is responsible for resetting too */
1189	tr->current_trace = saved_tracer;
1190	if (ret) {
1191		printk(KERN_CONT "FAILED!\n");
1192		/* Add the warning after printing 'FAILED' */
1193		WARN_ON(1);
1194		return -1;
1195	}
1196	/* Only reset on passing, to avoid touching corrupted buffers */
1197	tracing_reset_online_cpus(&tr->trace_buffer);
1198
1199#ifdef CONFIG_TRACER_MAX_TRACE
1200	if (type->use_max_tr) {
1201		tr->allocated_snapshot = false;
1202
1203		/* Shrink the max buffer again */
1204		if (ring_buffer_expanded)
1205			ring_buffer_resize(tr->max_buffer.buffer, 1,
1206					   RING_BUFFER_ALL_CPUS);
1207	}
1208#endif
1209
1210	printk(KERN_CONT "PASSED\n");
1211	return 0;
1212}
1213#else
1214static inline int run_tracer_selftest(struct tracer *type)
1215{
1216	return 0;
1217}
1218#endif /* CONFIG_FTRACE_STARTUP_TEST */
1219
1220static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1221
1222static void __init apply_trace_boot_options(void);
1223
1224/**
1225 * register_tracer - register a tracer with the ftrace system.
1226 * @type - the plugin for the tracer
1227 *
1228 * Register a new plugin tracer.
1229 */
1230int __init register_tracer(struct tracer *type)
1231{
1232	struct tracer *t;
1233	int ret = 0;
1234
1235	if (!type->name) {
1236		pr_info("Tracer must have a name\n");
1237		return -1;
1238	}
1239
1240	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1241		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1242		return -1;
1243	}
1244
1245	mutex_lock(&trace_types_lock);
1246
1247	tracing_selftest_running = true;
1248
1249	for (t = trace_types; t; t = t->next) {
1250		if (strcmp(type->name, t->name) == 0) {
1251			/* already found */
1252			pr_info("Tracer %s already registered\n",
1253				type->name);
1254			ret = -1;
1255			goto out;
1256		}
1257	}
1258
1259	if (!type->set_flag)
1260		type->set_flag = &dummy_set_flag;
1261	if (!type->flags)
1262		type->flags = &dummy_tracer_flags;
1263	else
1264		if (!type->flags->opts)
1265			type->flags->opts = dummy_tracer_opt;
1266
1267	ret = run_tracer_selftest(type);
1268	if (ret < 0)
1269		goto out;
1270
1271	type->next = trace_types;
1272	trace_types = type;
1273	add_tracer_options(&global_trace, type);
1274
1275 out:
1276	tracing_selftest_running = false;
1277	mutex_unlock(&trace_types_lock);
1278
1279	if (ret || !default_bootup_tracer)
1280		goto out_unlock;
1281
1282	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1283		goto out_unlock;
1284
1285	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1286	/* Do we want this tracer to start on bootup? */
1287	tracing_set_tracer(&global_trace, type->name);
1288	default_bootup_tracer = NULL;
1289
1290	apply_trace_boot_options();
1291
1292	/* disable other selftests, since this will break it. */
1293	tracing_selftest_disabled = true;
1294#ifdef CONFIG_FTRACE_STARTUP_TEST
1295	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1296	       type->name);
1297#endif
1298
1299 out_unlock:
1300	return ret;
1301}
1302
1303void tracing_reset(struct trace_buffer *buf, int cpu)
1304{
1305	struct ring_buffer *buffer = buf->buffer;
1306
1307	if (!buffer)
1308		return;
1309
1310	ring_buffer_record_disable(buffer);
1311
1312	/* Make sure all commits have finished */
1313	synchronize_sched();
1314	ring_buffer_reset_cpu(buffer, cpu);
1315
1316	ring_buffer_record_enable(buffer);
1317}
1318
1319void tracing_reset_online_cpus(struct trace_buffer *buf)
1320{
1321	struct ring_buffer *buffer = buf->buffer;
1322	int cpu;
1323
1324	if (!buffer)
1325		return;
1326
1327	ring_buffer_record_disable(buffer);
1328
1329	/* Make sure all commits have finished */
1330	synchronize_sched();
1331
1332	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1333
1334	for_each_online_cpu(cpu)
1335		ring_buffer_reset_cpu(buffer, cpu);
1336
1337	ring_buffer_record_enable(buffer);
1338}
1339
1340/* Must have trace_types_lock held */
1341void tracing_reset_all_online_cpus(void)
1342{
1343	struct trace_array *tr;
1344
1345	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1346		tracing_reset_online_cpus(&tr->trace_buffer);
1347#ifdef CONFIG_TRACER_MAX_TRACE
1348		tracing_reset_online_cpus(&tr->max_buffer);
1349#endif
1350	}
1351}
1352
1353#define SAVED_CMDLINES_DEFAULT 128
1354#define NO_CMDLINE_MAP UINT_MAX
1355static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1356struct saved_cmdlines_buffer {
1357	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1358	unsigned *map_cmdline_to_pid;
1359	unsigned cmdline_num;
1360	int cmdline_idx;
1361	char *saved_cmdlines;
1362};
1363static struct saved_cmdlines_buffer *savedcmd;
1364
1365/* temporary disable recording */
1366static atomic_t trace_record_cmdline_disabled __read_mostly;
1367
1368static inline char *get_saved_cmdlines(int idx)
1369{
1370	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1371}
1372
1373static inline void set_cmdline(int idx, const char *cmdline)
1374{
1375	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1376}
1377
1378static int allocate_cmdlines_buffer(unsigned int val,
1379				    struct saved_cmdlines_buffer *s)
1380{
1381	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1382					GFP_KERNEL);
1383	if (!s->map_cmdline_to_pid)
1384		return -ENOMEM;
1385
1386	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1387	if (!s->saved_cmdlines) {
1388		kfree(s->map_cmdline_to_pid);
1389		return -ENOMEM;
1390	}
1391
1392	s->cmdline_idx = 0;
1393	s->cmdline_num = val;
1394	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1395	       sizeof(s->map_pid_to_cmdline));
1396	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1397	       val * sizeof(*s->map_cmdline_to_pid));
1398
1399	return 0;
1400}
1401
1402static int trace_create_savedcmd(void)
1403{
1404	int ret;
1405
1406	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1407	if (!savedcmd)
1408		return -ENOMEM;
1409
1410	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1411	if (ret < 0) {
1412		kfree(savedcmd);
1413		savedcmd = NULL;
1414		return -ENOMEM;
1415	}
1416
1417	return 0;
1418}
1419
1420int is_tracing_stopped(void)
1421{
1422	return global_trace.stop_count;
1423}
1424
1425/**
1426 * tracing_start - quick start of the tracer
1427 *
1428 * If tracing is enabled but was stopped by tracing_stop,
1429 * this will start the tracer back up.
1430 */
1431void tracing_start(void)
1432{
1433	struct ring_buffer *buffer;
1434	unsigned long flags;
1435
1436	if (tracing_disabled)
1437		return;
1438
1439	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1440	if (--global_trace.stop_count) {
1441		if (global_trace.stop_count < 0) {
1442			/* Someone screwed up their debugging */
1443			WARN_ON_ONCE(1);
1444			global_trace.stop_count = 0;
1445		}
1446		goto out;
1447	}
1448
1449	/* Prevent the buffers from switching */
1450	arch_spin_lock(&global_trace.max_lock);
1451
1452	buffer = global_trace.trace_buffer.buffer;
1453	if (buffer)
1454		ring_buffer_record_enable(buffer);
1455
1456#ifdef CONFIG_TRACER_MAX_TRACE
1457	buffer = global_trace.max_buffer.buffer;
1458	if (buffer)
1459		ring_buffer_record_enable(buffer);
1460#endif
1461
1462	arch_spin_unlock(&global_trace.max_lock);
1463
1464 out:
1465	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1466}
1467
1468static void tracing_start_tr(struct trace_array *tr)
1469{
1470	struct ring_buffer *buffer;
1471	unsigned long flags;
1472
1473	if (tracing_disabled)
1474		return;
1475
1476	/* If global, we need to also start the max tracer */
1477	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1478		return tracing_start();
1479
1480	raw_spin_lock_irqsave(&tr->start_lock, flags);
1481
1482	if (--tr->stop_count) {
1483		if (tr->stop_count < 0) {
1484			/* Someone screwed up their debugging */
1485			WARN_ON_ONCE(1);
1486			tr->stop_count = 0;
1487		}
1488		goto out;
1489	}
1490
1491	buffer = tr->trace_buffer.buffer;
1492	if (buffer)
1493		ring_buffer_record_enable(buffer);
1494
1495 out:
1496	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1497}
1498
1499/**
1500 * tracing_stop - quick stop of the tracer
1501 *
1502 * Light weight way to stop tracing. Use in conjunction with
1503 * tracing_start.
1504 */
1505void tracing_stop(void)
1506{
1507	struct ring_buffer *buffer;
1508	unsigned long flags;
1509
1510	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1511	if (global_trace.stop_count++)
1512		goto out;
1513
1514	/* Prevent the buffers from switching */
1515	arch_spin_lock(&global_trace.max_lock);
1516
1517	buffer = global_trace.trace_buffer.buffer;
1518	if (buffer)
1519		ring_buffer_record_disable(buffer);
1520
1521#ifdef CONFIG_TRACER_MAX_TRACE
1522	buffer = global_trace.max_buffer.buffer;
1523	if (buffer)
1524		ring_buffer_record_disable(buffer);
1525#endif
1526
1527	arch_spin_unlock(&global_trace.max_lock);
1528
1529 out:
1530	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1531}
1532
1533static void tracing_stop_tr(struct trace_array *tr)
1534{
1535	struct ring_buffer *buffer;
1536	unsigned long flags;
1537
1538	/* If global, we need to also stop the max tracer */
1539	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1540		return tracing_stop();
1541
1542	raw_spin_lock_irqsave(&tr->start_lock, flags);
1543	if (tr->stop_count++)
1544		goto out;
1545
1546	buffer = tr->trace_buffer.buffer;
1547	if (buffer)
1548		ring_buffer_record_disable(buffer);
1549
1550 out:
1551	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1552}
1553
1554void trace_stop_cmdline_recording(void);
1555
1556static int trace_save_cmdline(struct task_struct *tsk)
1557{
1558	unsigned pid, idx;
1559
1560	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1561		return 0;
1562
1563	/*
1564	 * It's not the end of the world if we don't get
1565	 * the lock, but we also don't want to spin
1566	 * nor do we want to disable interrupts,
1567	 * so if we miss here, then better luck next time.
1568	 */
1569	if (!arch_spin_trylock(&trace_cmdline_lock))
1570		return 0;
1571
1572	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1573	if (idx == NO_CMDLINE_MAP) {
1574		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1575
1576		/*
1577		 * Check whether the cmdline buffer at idx has a pid
1578		 * mapped. We are going to overwrite that entry so we
1579		 * need to clear the map_pid_to_cmdline. Otherwise we
1580		 * would read the new comm for the old pid.
1581		 */
1582		pid = savedcmd->map_cmdline_to_pid[idx];
1583		if (pid != NO_CMDLINE_MAP)
1584			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1585
1586		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1587		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1588
1589		savedcmd->cmdline_idx = idx;
1590	}
1591
1592	set_cmdline(idx, tsk->comm);
1593
1594	arch_spin_unlock(&trace_cmdline_lock);
1595
1596	return 1;
1597}
1598
1599static void __trace_find_cmdline(int pid, char comm[])
1600{
1601	unsigned map;
1602
1603	if (!pid) {
1604		strcpy(comm, "<idle>");
1605		return;
1606	}
1607
1608	if (WARN_ON_ONCE(pid < 0)) {
1609		strcpy(comm, "<XXX>");
1610		return;
1611	}
1612
1613	if (pid > PID_MAX_DEFAULT) {
1614		strcpy(comm, "<...>");
1615		return;
1616	}
1617
1618	map = savedcmd->map_pid_to_cmdline[pid];
1619	if (map != NO_CMDLINE_MAP)
1620		strcpy(comm, get_saved_cmdlines(map));
1621	else
1622		strcpy(comm, "<...>");
1623}
1624
1625void trace_find_cmdline(int pid, char comm[])
1626{
1627	preempt_disable();
1628	arch_spin_lock(&trace_cmdline_lock);
1629
1630	__trace_find_cmdline(pid, comm);
1631
1632	arch_spin_unlock(&trace_cmdline_lock);
1633	preempt_enable();
1634}
1635
1636void tracing_record_cmdline(struct task_struct *tsk)
1637{
1638	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1639		return;
1640
1641	if (!__this_cpu_read(trace_cmdline_save))
1642		return;
1643
1644	if (trace_save_cmdline(tsk))
1645		__this_cpu_write(trace_cmdline_save, false);
1646}
1647
1648void
1649tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1650			     int pc)
1651{
1652	struct task_struct *tsk = current;
1653
1654	entry->preempt_count		= pc & 0xff;
1655	entry->pid			= (tsk) ? tsk->pid : 0;
1656	entry->flags =
1657#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1658		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1659#else
1660		TRACE_FLAG_IRQS_NOSUPPORT |
1661#endif
1662		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1663		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1664		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1665		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1666}
1667EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1668
1669struct ring_buffer_event *
1670trace_buffer_lock_reserve(struct ring_buffer *buffer,
1671			  int type,
1672			  unsigned long len,
1673			  unsigned long flags, int pc)
1674{
1675	struct ring_buffer_event *event;
1676
1677	event = ring_buffer_lock_reserve(buffer, len);
1678	if (event != NULL) {
1679		struct trace_entry *ent = ring_buffer_event_data(event);
1680
1681		tracing_generic_entry_update(ent, flags, pc);
1682		ent->type = type;
1683	}
1684
1685	return event;
1686}
1687
1688void
1689__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1690{
1691	__this_cpu_write(trace_cmdline_save, true);
1692	ring_buffer_unlock_commit(buffer, event);
1693}
1694
1695void trace_buffer_unlock_commit(struct trace_array *tr,
1696				struct ring_buffer *buffer,
1697				struct ring_buffer_event *event,
1698				unsigned long flags, int pc)
1699{
1700	__buffer_unlock_commit(buffer, event);
1701
1702	ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
1703	ftrace_trace_userstack(buffer, flags, pc);
1704}
1705EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1706
1707static struct ring_buffer *temp_buffer;
1708
1709struct ring_buffer_event *
1710trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1711			  struct trace_event_file *trace_file,
1712			  int type, unsigned long len,
1713			  unsigned long flags, int pc)
1714{
1715	struct ring_buffer_event *entry;
1716
1717	*current_rb = trace_file->tr->trace_buffer.buffer;
1718	entry = trace_buffer_lock_reserve(*current_rb,
1719					 type, len, flags, pc);
1720	/*
1721	 * If tracing is off, but we have triggers enabled
1722	 * we still need to look at the event data. Use the temp_buffer
1723	 * to store the trace event for the tigger to use. It's recusive
1724	 * safe and will not be recorded anywhere.
1725	 */
1726	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1727		*current_rb = temp_buffer;
1728		entry = trace_buffer_lock_reserve(*current_rb,
1729						  type, len, flags, pc);
1730	}
1731	return entry;
1732}
1733EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1734
1735struct ring_buffer_event *
1736trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1737				  int type, unsigned long len,
1738				  unsigned long flags, int pc)
1739{
1740	*current_rb = global_trace.trace_buffer.buffer;
1741	return trace_buffer_lock_reserve(*current_rb,
1742					 type, len, flags, pc);
1743}
1744EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1745
1746void trace_buffer_unlock_commit_regs(struct trace_array *tr,
1747				     struct ring_buffer *buffer,
1748				     struct ring_buffer_event *event,
1749				     unsigned long flags, int pc,
1750				     struct pt_regs *regs)
1751{
1752	__buffer_unlock_commit(buffer, event);
1753
1754	ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
1755	ftrace_trace_userstack(buffer, flags, pc);
1756}
1757EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1758
1759void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1760					 struct ring_buffer_event *event)
1761{
1762	ring_buffer_discard_commit(buffer, event);
1763}
1764EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1765
1766void
1767trace_function(struct trace_array *tr,
1768	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
1769	       int pc)
1770{
1771	struct trace_event_call *call = &event_function;
1772	struct ring_buffer *buffer = tr->trace_buffer.buffer;
1773	struct ring_buffer_event *event;
1774	struct ftrace_entry *entry;
1775
1776	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1777					  flags, pc);
1778	if (!event)
1779		return;
1780	entry	= ring_buffer_event_data(event);
1781	entry->ip			= ip;
1782	entry->parent_ip		= parent_ip;
1783
1784	if (!call_filter_check_discard(call, entry, buffer, event))
1785		__buffer_unlock_commit(buffer, event);
1786}
1787
1788#ifdef CONFIG_STACKTRACE
1789
1790#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1791struct ftrace_stack {
1792	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
1793};
1794
1795static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1796static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1797
1798static void __ftrace_trace_stack(struct ring_buffer *buffer,
1799				 unsigned long flags,
1800				 int skip, int pc, struct pt_regs *regs)
1801{
1802	struct trace_event_call *call = &event_kernel_stack;
1803	struct ring_buffer_event *event;
1804	struct stack_entry *entry;
1805	struct stack_trace trace;
1806	int use_stack;
1807	int size = FTRACE_STACK_ENTRIES;
1808
1809	trace.nr_entries	= 0;
1810	trace.skip		= skip;
1811
1812	/*
1813	 * Since events can happen in NMIs there's no safe way to
1814	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1815	 * or NMI comes in, it will just have to use the default
1816	 * FTRACE_STACK_SIZE.
1817	 */
1818	preempt_disable_notrace();
1819
1820	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1821	/*
1822	 * We don't need any atomic variables, just a barrier.
1823	 * If an interrupt comes in, we don't care, because it would
1824	 * have exited and put the counter back to what we want.
1825	 * We just need a barrier to keep gcc from moving things
1826	 * around.
1827	 */
1828	barrier();
1829	if (use_stack == 1) {
1830		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
1831		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
1832
1833		if (regs)
1834			save_stack_trace_regs(regs, &trace);
1835		else
1836			save_stack_trace(&trace);
1837
1838		if (trace.nr_entries > size)
1839			size = trace.nr_entries;
1840	} else
1841		/* From now on, use_stack is a boolean */
1842		use_stack = 0;
1843
1844	size *= sizeof(unsigned long);
1845
1846	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1847					  sizeof(*entry) + size, flags, pc);
1848	if (!event)
1849		goto out;
1850	entry = ring_buffer_event_data(event);
1851
1852	memset(&entry->caller, 0, size);
1853
1854	if (use_stack)
1855		memcpy(&entry->caller, trace.entries,
1856		       trace.nr_entries * sizeof(unsigned long));
1857	else {
1858		trace.max_entries	= FTRACE_STACK_ENTRIES;
1859		trace.entries		= entry->caller;
1860		if (regs)
1861			save_stack_trace_regs(regs, &trace);
1862		else
1863			save_stack_trace(&trace);
1864	}
1865
1866	entry->size = trace.nr_entries;
1867
1868	if (!call_filter_check_discard(call, entry, buffer, event))
1869		__buffer_unlock_commit(buffer, event);
1870
1871 out:
1872	/* Again, don't let gcc optimize things here */
1873	barrier();
1874	__this_cpu_dec(ftrace_stack_reserve);
1875	preempt_enable_notrace();
1876
1877}
1878
1879static inline void ftrace_trace_stack(struct trace_array *tr,
1880				      struct ring_buffer *buffer,
1881				      unsigned long flags,
1882				      int skip, int pc, struct pt_regs *regs)
1883{
1884	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1885		return;
1886
1887	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
1888}
1889
1890void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1891		   int pc)
1892{
1893	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1894}
1895
1896/**
1897 * trace_dump_stack - record a stack back trace in the trace buffer
1898 * @skip: Number of functions to skip (helper handlers)
1899 */
1900void trace_dump_stack(int skip)
1901{
1902	unsigned long flags;
1903
1904	if (tracing_disabled || tracing_selftest_running)
1905		return;
1906
1907	local_save_flags(flags);
1908
1909	/*
1910	 * Skip 3 more, seems to get us at the caller of
1911	 * this function.
1912	 */
1913	skip += 3;
1914	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
1915			     flags, skip, preempt_count(), NULL);
1916}
1917
1918static DEFINE_PER_CPU(int, user_stack_count);
1919
1920void
1921ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1922{
1923	struct trace_event_call *call = &event_user_stack;
1924	struct ring_buffer_event *event;
1925	struct userstack_entry *entry;
1926	struct stack_trace trace;
1927
1928	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
1929		return;
1930
1931	/*
1932	 * NMIs can not handle page faults, even with fix ups.
1933	 * The save user stack can (and often does) fault.
1934	 */
1935	if (unlikely(in_nmi()))
1936		return;
1937
1938	/*
1939	 * prevent recursion, since the user stack tracing may
1940	 * trigger other kernel events.
1941	 */
1942	preempt_disable();
1943	if (__this_cpu_read(user_stack_count))
1944		goto out;
1945
1946	__this_cpu_inc(user_stack_count);
1947
1948	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1949					  sizeof(*entry), flags, pc);
1950	if (!event)
1951		goto out_drop_count;
1952	entry	= ring_buffer_event_data(event);
1953
1954	entry->tgid		= current->tgid;
1955	memset(&entry->caller, 0, sizeof(entry->caller));
1956
1957	trace.nr_entries	= 0;
1958	trace.max_entries	= FTRACE_STACK_ENTRIES;
1959	trace.skip		= 0;
1960	trace.entries		= entry->caller;
1961
1962	save_stack_trace_user(&trace);
1963	if (!call_filter_check_discard(call, entry, buffer, event))
1964		__buffer_unlock_commit(buffer, event);
1965
1966 out_drop_count:
1967	__this_cpu_dec(user_stack_count);
1968 out:
1969	preempt_enable();
1970}
1971
1972#ifdef UNUSED
1973static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1974{
1975	ftrace_trace_userstack(tr, flags, preempt_count());
1976}
1977#endif /* UNUSED */
1978
1979#endif /* CONFIG_STACKTRACE */
1980
1981/* created for use with alloc_percpu */
1982struct trace_buffer_struct {
1983	char buffer[TRACE_BUF_SIZE];
1984};
1985
1986static struct trace_buffer_struct *trace_percpu_buffer;
1987static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1988static struct trace_buffer_struct *trace_percpu_irq_buffer;
1989static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1990
1991/*
1992 * The buffer used is dependent on the context. There is a per cpu
1993 * buffer for normal context, softirq contex, hard irq context and
1994 * for NMI context. Thise allows for lockless recording.
1995 *
1996 * Note, if the buffers failed to be allocated, then this returns NULL
1997 */
1998static char *get_trace_buf(void)
1999{
2000	struct trace_buffer_struct *percpu_buffer;
2001
2002	/*
2003	 * If we have allocated per cpu buffers, then we do not
2004	 * need to do any locking.
2005	 */
2006	if (in_nmi())
2007		percpu_buffer = trace_percpu_nmi_buffer;
2008	else if (in_irq())
2009		percpu_buffer = trace_percpu_irq_buffer;
2010	else if (in_softirq())
2011		percpu_buffer = trace_percpu_sirq_buffer;
2012	else
2013		percpu_buffer = trace_percpu_buffer;
2014
2015	if (!percpu_buffer)
2016		return NULL;
2017
2018	return this_cpu_ptr(&percpu_buffer->buffer[0]);
2019}
2020
2021static int alloc_percpu_trace_buffer(void)
2022{
2023	struct trace_buffer_struct *buffers;
2024	struct trace_buffer_struct *sirq_buffers;
2025	struct trace_buffer_struct *irq_buffers;
2026	struct trace_buffer_struct *nmi_buffers;
2027
2028	buffers = alloc_percpu(struct trace_buffer_struct);
2029	if (!buffers)
2030		goto err_warn;
2031
2032	sirq_buffers = alloc_percpu(struct trace_buffer_struct);
2033	if (!sirq_buffers)
2034		goto err_sirq;
2035
2036	irq_buffers = alloc_percpu(struct trace_buffer_struct);
2037	if (!irq_buffers)
2038		goto err_irq;
2039
2040	nmi_buffers = alloc_percpu(struct trace_buffer_struct);
2041	if (!nmi_buffers)
2042		goto err_nmi;
2043
2044	trace_percpu_buffer = buffers;
2045	trace_percpu_sirq_buffer = sirq_buffers;
2046	trace_percpu_irq_buffer = irq_buffers;
2047	trace_percpu_nmi_buffer = nmi_buffers;
2048
2049	return 0;
2050
2051 err_nmi:
2052	free_percpu(irq_buffers);
2053 err_irq:
2054	free_percpu(sirq_buffers);
2055 err_sirq:
2056	free_percpu(buffers);
2057 err_warn:
2058	WARN(1, "Could not allocate percpu trace_printk buffer");
2059	return -ENOMEM;
2060}
2061
2062static int buffers_allocated;
2063
2064void trace_printk_init_buffers(void)
2065{
2066	if (buffers_allocated)
2067		return;
2068
2069	if (alloc_percpu_trace_buffer())
2070		return;
2071
2072	/* trace_printk() is for debug use only. Don't use it in production. */
2073
2074	pr_warning("\n");
2075	pr_warning("**********************************************************\n");
2076	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2077	pr_warning("**                                                      **\n");
2078	pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2079	pr_warning("**                                                      **\n");
2080	pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2081	pr_warning("** unsafe for production use.                           **\n");
2082	pr_warning("**                                                      **\n");
2083	pr_warning("** If you see this message and you are not debugging    **\n");
2084	pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2085	pr_warning("**                                                      **\n");
2086	pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2087	pr_warning("**********************************************************\n");
2088
2089	/* Expand the buffers to set size */
2090	tracing_update_buffers();
2091
2092	buffers_allocated = 1;
2093
2094	/*
2095	 * trace_printk_init_buffers() can be called by modules.
2096	 * If that happens, then we need to start cmdline recording
2097	 * directly here. If the global_trace.buffer is already
2098	 * allocated here, then this was called by module code.
2099	 */
2100	if (global_trace.trace_buffer.buffer)
2101		tracing_start_cmdline_record();
2102}
2103
2104void trace_printk_start_comm(void)
2105{
2106	/* Start tracing comms if trace printk is set */
2107	if (!buffers_allocated)
2108		return;
2109	tracing_start_cmdline_record();
2110}
2111
2112static void trace_printk_start_stop_comm(int enabled)
2113{
2114	if (!buffers_allocated)
2115		return;
2116
2117	if (enabled)
2118		tracing_start_cmdline_record();
2119	else
2120		tracing_stop_cmdline_record();
2121}
2122
2123/**
2124 * trace_vbprintk - write binary msg to tracing buffer
2125 *
2126 */
2127int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2128{
2129	struct trace_event_call *call = &event_bprint;
2130	struct ring_buffer_event *event;
2131	struct ring_buffer *buffer;
2132	struct trace_array *tr = &global_trace;
2133	struct bprint_entry *entry;
2134	unsigned long flags;
2135	char *tbuffer;
2136	int len = 0, size, pc;
2137
2138	if (unlikely(tracing_selftest_running || tracing_disabled))
2139		return 0;
2140
2141	/* Don't pollute graph traces with trace_vprintk internals */
2142	pause_graph_tracing();
2143
2144	pc = preempt_count();
2145	preempt_disable_notrace();
2146
2147	tbuffer = get_trace_buf();
2148	if (!tbuffer) {
2149		len = 0;
2150		goto out;
2151	}
2152
2153	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2154
2155	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2156		goto out;
2157
2158	local_save_flags(flags);
2159	size = sizeof(*entry) + sizeof(u32) * len;
2160	buffer = tr->trace_buffer.buffer;
2161	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2162					  flags, pc);
2163	if (!event)
2164		goto out;
2165	entry = ring_buffer_event_data(event);
2166	entry->ip			= ip;
2167	entry->fmt			= fmt;
2168
2169	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2170	if (!call_filter_check_discard(call, entry, buffer, event)) {
2171		__buffer_unlock_commit(buffer, event);
2172		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2173	}
2174
2175out:
2176	preempt_enable_notrace();
2177	unpause_graph_tracing();
2178
2179	return len;
2180}
2181EXPORT_SYMBOL_GPL(trace_vbprintk);
2182
2183static int
2184__trace_array_vprintk(struct ring_buffer *buffer,
2185		      unsigned long ip, const char *fmt, va_list args)
2186{
2187	struct trace_event_call *call = &event_print;
2188	struct ring_buffer_event *event;
2189	int len = 0, size, pc;
2190	struct print_entry *entry;
2191	unsigned long flags;
2192	char *tbuffer;
2193
2194	if (tracing_disabled || tracing_selftest_running)
2195		return 0;
2196
2197	/* Don't pollute graph traces with trace_vprintk internals */
2198	pause_graph_tracing();
2199
2200	pc = preempt_count();
2201	preempt_disable_notrace();
2202
2203
2204	tbuffer = get_trace_buf();
2205	if (!tbuffer) {
2206		len = 0;
2207		goto out;
2208	}
2209
2210	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2211
2212	local_save_flags(flags);
2213	size = sizeof(*entry) + len + 1;
2214	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2215					  flags, pc);
2216	if (!event)
2217		goto out;
2218	entry = ring_buffer_event_data(event);
2219	entry->ip = ip;
2220
2221	memcpy(&entry->buf, tbuffer, len + 1);
2222	if (!call_filter_check_discard(call, entry, buffer, event)) {
2223		__buffer_unlock_commit(buffer, event);
2224		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2225	}
2226 out:
2227	preempt_enable_notrace();
2228	unpause_graph_tracing();
2229
2230	return len;
2231}
2232
2233int trace_array_vprintk(struct trace_array *tr,
2234			unsigned long ip, const char *fmt, va_list args)
2235{
2236	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2237}
2238
2239int trace_array_printk(struct trace_array *tr,
2240		       unsigned long ip, const char *fmt, ...)
2241{
2242	int ret;
2243	va_list ap;
2244
2245	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2246		return 0;
2247
2248	va_start(ap, fmt);
2249	ret = trace_array_vprintk(tr, ip, fmt, ap);
2250	va_end(ap);
2251	return ret;
2252}
2253
2254int trace_array_printk_buf(struct ring_buffer *buffer,
2255			   unsigned long ip, const char *fmt, ...)
2256{
2257	int ret;
2258	va_list ap;
2259
2260	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2261		return 0;
2262
2263	va_start(ap, fmt);
2264	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2265	va_end(ap);
2266	return ret;
2267}
2268
2269int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2270{
2271	return trace_array_vprintk(&global_trace, ip, fmt, args);
2272}
2273EXPORT_SYMBOL_GPL(trace_vprintk);
2274
2275static void trace_iterator_increment(struct trace_iterator *iter)
2276{
2277	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2278
2279	iter->idx++;
2280	if (buf_iter)
2281		ring_buffer_read(buf_iter, NULL);
2282}
2283
2284static struct trace_entry *
2285peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2286		unsigned long *lost_events)
2287{
2288	struct ring_buffer_event *event;
2289	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2290
2291	if (buf_iter)
2292		event = ring_buffer_iter_peek(buf_iter, ts);
2293	else
2294		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2295					 lost_events);
2296
2297	if (event) {
2298		iter->ent_size = ring_buffer_event_length(event);
2299		return ring_buffer_event_data(event);
2300	}
2301	iter->ent_size = 0;
2302	return NULL;
2303}
2304
2305static struct trace_entry *
2306__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2307		  unsigned long *missing_events, u64 *ent_ts)
2308{
2309	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2310	struct trace_entry *ent, *next = NULL;
2311	unsigned long lost_events = 0, next_lost = 0;
2312	int cpu_file = iter->cpu_file;
2313	u64 next_ts = 0, ts;
2314	int next_cpu = -1;
2315	int next_size = 0;
2316	int cpu;
2317
2318	/*
2319	 * If we are in a per_cpu trace file, don't bother by iterating over
2320	 * all cpu and peek directly.
2321	 */
2322	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2323		if (ring_buffer_empty_cpu(buffer, cpu_file))
2324			return NULL;
2325		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2326		if (ent_cpu)
2327			*ent_cpu = cpu_file;
2328
2329		return ent;
2330	}
2331
2332	for_each_tracing_cpu(cpu) {
2333
2334		if (ring_buffer_empty_cpu(buffer, cpu))
2335			continue;
2336
2337		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2338
2339		/*
2340		 * Pick the entry with the smallest timestamp:
2341		 */
2342		if (ent && (!next || ts < next_ts)) {
2343			next = ent;
2344			next_cpu = cpu;
2345			next_ts = ts;
2346			next_lost = lost_events;
2347			next_size = iter->ent_size;
2348		}
2349	}
2350
2351	iter->ent_size = next_size;
2352
2353	if (ent_cpu)
2354		*ent_cpu = next_cpu;
2355
2356	if (ent_ts)
2357		*ent_ts = next_ts;
2358
2359	if (missing_events)
2360		*missing_events = next_lost;
2361
2362	return next;
2363}
2364
2365/* Find the next real entry, without updating the iterator itself */
2366struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2367					  int *ent_cpu, u64 *ent_ts)
2368{
2369	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2370}
2371
2372/* Find the next real entry, and increment the iterator to the next entry */
2373void *trace_find_next_entry_inc(struct trace_iterator *iter)
2374{
2375	iter->ent = __find_next_entry(iter, &iter->cpu,
2376				      &iter->lost_events, &iter->ts);
2377
2378	if (iter->ent)
2379		trace_iterator_increment(iter);
2380
2381	return iter->ent ? iter : NULL;
2382}
2383
2384static void trace_consume(struct trace_iterator *iter)
2385{
2386	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2387			    &iter->lost_events);
2388}
2389
2390static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2391{
2392	struct trace_iterator *iter = m->private;
2393	int i = (int)*pos;
2394	void *ent;
2395
2396	WARN_ON_ONCE(iter->leftover);
2397
2398	(*pos)++;
2399
2400	/* can't go backwards */
2401	if (iter->idx > i)
2402		return NULL;
2403
2404	if (iter->idx < 0)
2405		ent = trace_find_next_entry_inc(iter);
2406	else
2407		ent = iter;
2408
2409	while (ent && iter->idx < i)
2410		ent = trace_find_next_entry_inc(iter);
2411
2412	iter->pos = *pos;
2413
2414	return ent;
2415}
2416
2417void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2418{
2419	struct ring_buffer_event *event;
2420	struct ring_buffer_iter *buf_iter;
2421	unsigned long entries = 0;
2422	u64 ts;
2423
2424	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2425
2426	buf_iter = trace_buffer_iter(iter, cpu);
2427	if (!buf_iter)
2428		return;
2429
2430	ring_buffer_iter_reset(buf_iter);
2431
2432	/*
2433	 * We could have the case with the max latency tracers
2434	 * that a reset never took place on a cpu. This is evident
2435	 * by the timestamp being before the start of the buffer.
2436	 */
2437	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2438		if (ts >= iter->trace_buffer->time_start)
2439			break;
2440		entries++;
2441		ring_buffer_read(buf_iter, NULL);
2442	}
2443
2444	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2445}
2446
2447/*
2448 * The current tracer is copied to avoid a global locking
2449 * all around.
2450 */
2451static void *s_start(struct seq_file *m, loff_t *pos)
2452{
2453	struct trace_iterator *iter = m->private;
2454	struct trace_array *tr = iter->tr;
2455	int cpu_file = iter->cpu_file;
2456	void *p = NULL;
2457	loff_t l = 0;
2458	int cpu;
2459
2460	/*
2461	 * copy the tracer to avoid using a global lock all around.
2462	 * iter->trace is a copy of current_trace, the pointer to the
2463	 * name may be used instead of a strcmp(), as iter->trace->name
2464	 * will point to the same string as current_trace->name.
2465	 */
2466	mutex_lock(&trace_types_lock);
2467	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2468		*iter->trace = *tr->current_trace;
2469	mutex_unlock(&trace_types_lock);
2470
2471#ifdef CONFIG_TRACER_MAX_TRACE
2472	if (iter->snapshot && iter->trace->use_max_tr)
2473		return ERR_PTR(-EBUSY);
2474#endif
2475
2476	if (!iter->snapshot)
2477		atomic_inc(&trace_record_cmdline_disabled);
2478
2479	if (*pos != iter->pos) {
2480		iter->ent = NULL;
2481		iter->cpu = 0;
2482		iter->idx = -1;
2483
2484		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2485			for_each_tracing_cpu(cpu)
2486				tracing_iter_reset(iter, cpu);
2487		} else
2488			tracing_iter_reset(iter, cpu_file);
2489
2490		iter->leftover = 0;
2491		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2492			;
2493
2494	} else {
2495		/*
2496		 * If we overflowed the seq_file before, then we want
2497		 * to just reuse the trace_seq buffer again.
2498		 */
2499		if (iter->leftover)
2500			p = iter;
2501		else {
2502			l = *pos - 1;
2503			p = s_next(m, p, &l);
2504		}
2505	}
2506
2507	trace_event_read_lock();
2508	trace_access_lock(cpu_file);
2509	return p;
2510}
2511
2512static void s_stop(struct seq_file *m, void *p)
2513{
2514	struct trace_iterator *iter = m->private;
2515
2516#ifdef CONFIG_TRACER_MAX_TRACE
2517	if (iter->snapshot && iter->trace->use_max_tr)
2518		return;
2519#endif
2520
2521	if (!iter->snapshot)
2522		atomic_dec(&trace_record_cmdline_disabled);
2523
2524	trace_access_unlock(iter->cpu_file);
2525	trace_event_read_unlock();
2526}
2527
2528static void
2529get_total_entries(struct trace_buffer *buf,
2530		  unsigned long *total, unsigned long *entries)
2531{
2532	unsigned long count;
2533	int cpu;
2534
2535	*total = 0;
2536	*entries = 0;
2537
2538	for_each_tracing_cpu(cpu) {
2539		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2540		/*
2541		 * If this buffer has skipped entries, then we hold all
2542		 * entries for the trace and we need to ignore the
2543		 * ones before the time stamp.
2544		 */
2545		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2546			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2547			/* total is the same as the entries */
2548			*total += count;
2549		} else
2550			*total += count +
2551				ring_buffer_overrun_cpu(buf->buffer, cpu);
2552		*entries += count;
2553	}
2554}
2555
2556static void print_lat_help_header(struct seq_file *m)
2557{
2558	seq_puts(m, "#                  _------=> CPU#            \n"
2559		    "#                 / _-----=> irqs-off        \n"
2560		    "#                | / _----=> need-resched    \n"
2561		    "#                || / _---=> hardirq/softirq \n"
2562		    "#                ||| / _--=> preempt-depth   \n"
2563		    "#                |||| /     delay            \n"
2564		    "#  cmd     pid   ||||| time  |   caller      \n"
2565		    "#     \\   /      |||||  \\    |   /         \n");
2566}
2567
2568static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2569{
2570	unsigned long total;
2571	unsigned long entries;
2572
2573	get_total_entries(buf, &total, &entries);
2574	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2575		   entries, total, num_online_cpus());
2576	seq_puts(m, "#\n");
2577}
2578
2579static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2580{
2581	print_event_info(buf, m);
2582	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
2583		    "#              | |       |          |         |\n");
2584}
2585
2586static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2587{
2588	print_event_info(buf, m);
2589	seq_puts(m, "#                              _-----=> irqs-off\n"
2590		    "#                             / _----=> need-resched\n"
2591		    "#                            | / _---=> hardirq/softirq\n"
2592		    "#                            || / _--=> preempt-depth\n"
2593		    "#                            ||| /     delay\n"
2594		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
2595		    "#              | |       |   ||||       |         |\n");
2596}
2597
2598void
2599print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2600{
2601	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2602	struct trace_buffer *buf = iter->trace_buffer;
2603	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2604	struct tracer *type = iter->trace;
2605	unsigned long entries;
2606	unsigned long total;
2607	const char *name = "preemption";
2608
2609	name = type->name;
2610
2611	get_total_entries(buf, &total, &entries);
2612
2613	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2614		   name, UTS_RELEASE);
2615	seq_puts(m, "# -----------------------------------"
2616		 "---------------------------------\n");
2617	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2618		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2619		   nsecs_to_usecs(data->saved_latency),
2620		   entries,
2621		   total,
2622		   buf->cpu,
2623#if defined(CONFIG_PREEMPT_NONE)
2624		   "server",
2625#elif defined(CONFIG_PREEMPT_VOLUNTARY)
2626		   "desktop",
2627#elif defined(CONFIG_PREEMPT)
2628		   "preempt",
2629#else
2630		   "unknown",
2631#endif
2632		   /* These are reserved for later use */
2633		   0, 0, 0, 0);
2634#ifdef CONFIG_SMP
2635	seq_printf(m, " #P:%d)\n", num_online_cpus());
2636#else
2637	seq_puts(m, ")\n");
2638#endif
2639	seq_puts(m, "#    -----------------\n");
2640	seq_printf(m, "#    | task: %.16s-%d "
2641		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2642		   data->comm, data->pid,
2643		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2644		   data->policy, data->rt_priority);
2645	seq_puts(m, "#    -----------------\n");
2646
2647	if (data->critical_start) {
2648		seq_puts(m, "#  => started at: ");
2649		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2650		trace_print_seq(m, &iter->seq);
2651		seq_puts(m, "\n#  => ended at:   ");
2652		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2653		trace_print_seq(m, &iter->seq);
2654		seq_puts(m, "\n#\n");
2655	}
2656
2657	seq_puts(m, "#\n");
2658}
2659
2660static void test_cpu_buff_start(struct trace_iterator *iter)
2661{
2662	struct trace_seq *s = &iter->seq;
2663	struct trace_array *tr = iter->tr;
2664
2665	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2666		return;
2667
2668	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2669		return;
2670
2671	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2672		return;
2673
2674	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2675		return;
2676
2677	if (iter->started)
2678		cpumask_set_cpu(iter->cpu, iter->started);
2679
2680	/* Don't print started cpu buffer for the first entry of the trace */
2681	if (iter->idx > 1)
2682		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2683				iter->cpu);
2684}
2685
2686static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2687{
2688	struct trace_array *tr = iter->tr;
2689	struct trace_seq *s = &iter->seq;
2690	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
2691	struct trace_entry *entry;
2692	struct trace_event *event;
2693
2694	entry = iter->ent;
2695
2696	test_cpu_buff_start(iter);
2697
2698	event = ftrace_find_event(entry->type);
2699
2700	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2701		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2702			trace_print_lat_context(iter);
2703		else
2704			trace_print_context(iter);
2705	}
2706
2707	if (trace_seq_has_overflowed(s))
2708		return TRACE_TYPE_PARTIAL_LINE;
2709
2710	if (event)
2711		return event->funcs->trace(iter, sym_flags, event);
2712
2713	trace_seq_printf(s, "Unknown type %d\n", entry->type);
2714
2715	return trace_handle_return(s);
2716}
2717
2718static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2719{
2720	struct trace_array *tr = iter->tr;
2721	struct trace_seq *s = &iter->seq;
2722	struct trace_entry *entry;
2723	struct trace_event *event;
2724
2725	entry = iter->ent;
2726
2727	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2728		trace_seq_printf(s, "%d %d %llu ",
2729				 entry->pid, iter->cpu, iter->ts);
2730
2731	if (trace_seq_has_overflowed(s))
2732		return TRACE_TYPE_PARTIAL_LINE;
2733
2734	event = ftrace_find_event(entry->type);
2735	if (event)
2736		return event->funcs->raw(iter, 0, event);
2737
2738	trace_seq_printf(s, "%d ?\n", entry->type);
2739
2740	return trace_handle_return(s);
2741}
2742
2743static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2744{
2745	struct trace_array *tr = iter->tr;
2746	struct trace_seq *s = &iter->seq;
2747	unsigned char newline = '\n';
2748	struct trace_entry *entry;
2749	struct trace_event *event;
2750
2751	entry = iter->ent;
2752
2753	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2754		SEQ_PUT_HEX_FIELD(s, entry->pid);
2755		SEQ_PUT_HEX_FIELD(s, iter->cpu);
2756		SEQ_PUT_HEX_FIELD(s, iter->ts);
2757		if (trace_seq_has_overflowed(s))
2758			return TRACE_TYPE_PARTIAL_LINE;
2759	}
2760
2761	event = ftrace_find_event(entry->type);
2762	if (event) {
2763		enum print_line_t ret = event->funcs->hex(iter, 0, event);
2764		if (ret != TRACE_TYPE_HANDLED)
2765			return ret;
2766	}
2767
2768	SEQ_PUT_FIELD(s, newline);
2769
2770	return trace_handle_return(s);
2771}
2772
2773static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2774{
2775	struct trace_array *tr = iter->tr;
2776	struct trace_seq *s = &iter->seq;
2777	struct trace_entry *entry;
2778	struct trace_event *event;
2779
2780	entry = iter->ent;
2781
2782	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2783		SEQ_PUT_FIELD(s, entry->pid);
2784		SEQ_PUT_FIELD(s, iter->cpu);
2785		SEQ_PUT_FIELD(s, iter->ts);
2786		if (trace_seq_has_overflowed(s))
2787			return TRACE_TYPE_PARTIAL_LINE;
2788	}
2789
2790	event = ftrace_find_event(entry->type);
2791	return event ? event->funcs->binary(iter, 0, event) :
2792		TRACE_TYPE_HANDLED;
2793}
2794
2795int trace_empty(struct trace_iterator *iter)
2796{
2797	struct ring_buffer_iter *buf_iter;
2798	int cpu;
2799
2800	/* If we are looking at one CPU buffer, only check that one */
2801	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2802		cpu = iter->cpu_file;
2803		buf_iter = trace_buffer_iter(iter, cpu);
2804		if (buf_iter) {
2805			if (!ring_buffer_iter_empty(buf_iter))
2806				return 0;
2807		} else {
2808			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2809				return 0;
2810		}
2811		return 1;
2812	}
2813
2814	for_each_tracing_cpu(cpu) {
2815		buf_iter = trace_buffer_iter(iter, cpu);
2816		if (buf_iter) {
2817			if (!ring_buffer_iter_empty(buf_iter))
2818				return 0;
2819		} else {
2820			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2821				return 0;
2822		}
2823	}
2824
2825	return 1;
2826}
2827
2828/*  Called with trace_event_read_lock() held. */
2829enum print_line_t print_trace_line(struct trace_iterator *iter)
2830{
2831	struct trace_array *tr = iter->tr;
2832	unsigned long trace_flags = tr->trace_flags;
2833	enum print_line_t ret;
2834
2835	if (iter->lost_events) {
2836		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2837				 iter->cpu, iter->lost_events);
2838		if (trace_seq_has_overflowed(&iter->seq))
2839			return TRACE_TYPE_PARTIAL_LINE;
2840	}
2841
2842	if (iter->trace && iter->trace->print_line) {
2843		ret = iter->trace->print_line(iter);
2844		if (ret != TRACE_TYPE_UNHANDLED)
2845			return ret;
2846	}
2847
2848	if (iter->ent->type == TRACE_BPUTS &&
2849			trace_flags & TRACE_ITER_PRINTK &&
2850			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2851		return trace_print_bputs_msg_only(iter);
2852
2853	if (iter->ent->type == TRACE_BPRINT &&
2854			trace_flags & TRACE_ITER_PRINTK &&
2855			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2856		return trace_print_bprintk_msg_only(iter);
2857
2858	if (iter->ent->type == TRACE_PRINT &&
2859			trace_flags & TRACE_ITER_PRINTK &&
2860			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2861		return trace_print_printk_msg_only(iter);
2862
2863	if (trace_flags & TRACE_ITER_BIN)
2864		return print_bin_fmt(iter);
2865
2866	if (trace_flags & TRACE_ITER_HEX)
2867		return print_hex_fmt(iter);
2868
2869	if (trace_flags & TRACE_ITER_RAW)
2870		return print_raw_fmt(iter);
2871
2872	return print_trace_fmt(iter);
2873}
2874
2875void trace_latency_header(struct seq_file *m)
2876{
2877	struct trace_iterator *iter = m->private;
2878	struct trace_array *tr = iter->tr;
2879
2880	/* print nothing if the buffers are empty */
2881	if (trace_empty(iter))
2882		return;
2883
2884	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2885		print_trace_header(m, iter);
2886
2887	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2888		print_lat_help_header(m);
2889}
2890
2891void trace_default_header(struct seq_file *m)
2892{
2893	struct trace_iterator *iter = m->private;
2894	struct trace_array *tr = iter->tr;
2895	unsigned long trace_flags = tr->trace_flags;
2896
2897	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2898		return;
2899
2900	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2901		/* print nothing if the buffers are empty */
2902		if (trace_empty(iter))
2903			return;
2904		print_trace_header(m, iter);
2905		if (!(trace_flags & TRACE_ITER_VERBOSE))
2906			print_lat_help_header(m);
2907	} else {
2908		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2909			if (trace_flags & TRACE_ITER_IRQ_INFO)
2910				print_func_help_header_irq(iter->trace_buffer, m);
2911			else
2912				print_func_help_header(iter->trace_buffer, m);
2913		}
2914	}
2915}
2916
2917static void test_ftrace_alive(struct seq_file *m)
2918{
2919	if (!ftrace_is_dead())
2920		return;
2921	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
2922		    "#          MAY BE MISSING FUNCTION EVENTS\n");
2923}
2924
2925#ifdef CONFIG_TRACER_MAX_TRACE
2926static void show_snapshot_main_help(struct seq_file *m)
2927{
2928	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
2929		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2930		    "#                      Takes a snapshot of the main buffer.\n"
2931		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
2932		    "#                      (Doesn't have to be '2' works with any number that\n"
2933		    "#                       is not a '0' or '1')\n");
2934}
2935
2936static void show_snapshot_percpu_help(struct seq_file *m)
2937{
2938	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2939#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2940	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
2941		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
2942#else
2943	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
2944		    "#                     Must use main snapshot file to allocate.\n");
2945#endif
2946	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
2947		    "#                      (Doesn't have to be '2' works with any number that\n"
2948		    "#                       is not a '0' or '1')\n");
2949}
2950
2951static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2952{
2953	if (iter->tr->allocated_snapshot)
2954		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2955	else
2956		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2957
2958	seq_puts(m, "# Snapshot commands:\n");
2959	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2960		show_snapshot_main_help(m);
2961	else
2962		show_snapshot_percpu_help(m);
2963}
2964#else
2965/* Should never be called */
2966static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2967#endif
2968
2969static int s_show(struct seq_file *m, void *v)
2970{
2971	struct trace_iterator *iter = v;
2972	int ret;
2973
2974	if (iter->ent == NULL) {
2975		if (iter->tr) {
2976			seq_printf(m, "# tracer: %s\n", iter->trace->name);
2977			seq_puts(m, "#\n");
2978			test_ftrace_alive(m);
2979		}
2980		if (iter->snapshot && trace_empty(iter))
2981			print_snapshot_help(m, iter);
2982		else if (iter->trace && iter->trace->print_header)
2983			iter->trace->print_header(m);
2984		else
2985			trace_default_header(m);
2986
2987	} else if (iter->leftover) {
2988		/*
2989		 * If we filled the seq_file buffer earlier, we
2990		 * want to just show it now.
2991		 */
2992		ret = trace_print_seq(m, &iter->seq);
2993
2994		/* ret should this time be zero, but you never know */
2995		iter->leftover = ret;
2996
2997	} else {
2998		print_trace_line(iter);
2999		ret = trace_print_seq(m, &iter->seq);
3000		/*
3001		 * If we overflow the seq_file buffer, then it will
3002		 * ask us for this data again at start up.
3003		 * Use that instead.
3004		 *  ret is 0 if seq_file write succeeded.
3005		 *        -1 otherwise.
3006		 */
3007		iter->leftover = ret;
3008	}
3009
3010	return 0;
3011}
3012
3013/*
3014 * Should be used after trace_array_get(), trace_types_lock
3015 * ensures that i_cdev was already initialized.
3016 */
3017static inline int tracing_get_cpu(struct inode *inode)
3018{
3019	if (inode->i_cdev) /* See trace_create_cpu_file() */
3020		return (long)inode->i_cdev - 1;
3021	return RING_BUFFER_ALL_CPUS;
3022}
3023
3024static const struct seq_operations tracer_seq_ops = {
3025	.start		= s_start,
3026	.next		= s_next,
3027	.stop		= s_stop,
3028	.show		= s_show,
3029};
3030
3031static struct trace_iterator *
3032__tracing_open(struct inode *inode, struct file *file, bool snapshot)
3033{
3034	struct trace_array *tr = inode->i_private;
3035	struct trace_iterator *iter;
3036	int cpu;
3037
3038	if (tracing_disabled)
3039		return ERR_PTR(-ENODEV);
3040
3041	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3042	if (!iter)
3043		return ERR_PTR(-ENOMEM);
3044
3045	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3046				    GFP_KERNEL);
3047	if (!iter->buffer_iter)
3048		goto release;
3049
3050	/*
3051	 * We make a copy of the current tracer to avoid concurrent
3052	 * changes on it while we are reading.
3053	 */
3054	mutex_lock(&trace_types_lock);
3055	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3056	if (!iter->trace)
3057		goto fail;
3058
3059	*iter->trace = *tr->current_trace;
3060
3061	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3062		goto fail;
3063
3064	iter->tr = tr;
3065
3066#ifdef CONFIG_TRACER_MAX_TRACE
3067	/* Currently only the top directory has a snapshot */
3068	if (tr->current_trace->print_max || snapshot)
3069		iter->trace_buffer = &tr->max_buffer;
3070	else
3071#endif
3072		iter->trace_buffer = &tr->trace_buffer;
3073	iter->snapshot = snapshot;
3074	iter->pos = -1;
3075	iter->cpu_file = tracing_get_cpu(inode);
3076	mutex_init(&iter->mutex);
3077
3078	/* Notify the tracer early; before we stop tracing. */
3079	if (iter->trace && iter->trace->open)
3080		iter->trace->open(iter);
3081
3082	/* Annotate start of buffers if we had overruns */
3083	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3084		iter->iter_flags |= TRACE_FILE_ANNOTATE;
3085
3086	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3087	if (trace_clocks[tr->clock_id].in_ns)
3088		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3089
3090	/* stop the trace while dumping if we are not opening "snapshot" */
3091	if (!iter->snapshot)
3092		tracing_stop_tr(tr);
3093
3094	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3095		for_each_tracing_cpu(cpu) {
3096			iter->buffer_iter[cpu] =
3097				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3098		}
3099		ring_buffer_read_prepare_sync();
3100		for_each_tracing_cpu(cpu) {
3101			ring_buffer_read_start(iter->buffer_iter[cpu]);
3102			tracing_iter_reset(iter, cpu);
3103		}
3104	} else {
3105		cpu = iter->cpu_file;
3106		iter->buffer_iter[cpu] =
3107			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3108		ring_buffer_read_prepare_sync();
3109		ring_buffer_read_start(iter->buffer_iter[cpu]);
3110		tracing_iter_reset(iter, cpu);
3111	}
3112
3113	mutex_unlock(&trace_types_lock);
3114
3115	return iter;
3116
3117 fail:
3118	mutex_unlock(&trace_types_lock);
3119	kfree(iter->trace);
3120	kfree(iter->buffer_iter);
3121release:
3122	seq_release_private(inode, file);
3123	return ERR_PTR(-ENOMEM);
3124}
3125
3126int tracing_open_generic(struct inode *inode, struct file *filp)
3127{
3128	if (tracing_disabled)
3129		return -ENODEV;
3130
3131	filp->private_data = inode->i_private;
3132	return 0;
3133}
3134
3135bool tracing_is_disabled(void)
3136{
3137	return (tracing_disabled) ? true: false;
3138}
3139
3140/*
3141 * Open and update trace_array ref count.
3142 * Must have the current trace_array passed to it.
3143 */
3144static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3145{
3146	struct trace_array *tr = inode->i_private;
3147
3148	if (tracing_disabled)
3149		return -ENODEV;
3150
3151	if (trace_array_get(tr) < 0)
3152		return -ENODEV;
3153
3154	filp->private_data = inode->i_private;
3155
3156	return 0;
3157}
3158
3159static int tracing_release(struct inode *inode, struct file *file)
3160{
3161	struct trace_array *tr = inode->i_private;
3162	struct seq_file *m = file->private_data;
3163	struct trace_iterator *iter;
3164	int cpu;
3165
3166	if (!(file->f_mode & FMODE_READ)) {
3167		trace_array_put(tr);
3168		return 0;
3169	}
3170
3171	/* Writes do not use seq_file */
3172	iter = m->private;
3173	mutex_lock(&trace_types_lock);
3174
3175	for_each_tracing_cpu(cpu) {
3176		if (iter->buffer_iter[cpu])
3177			ring_buffer_read_finish(iter->buffer_iter[cpu]);
3178	}
3179
3180	if (iter->trace && iter->trace->close)
3181		iter->trace->close(iter);
3182
3183	if (!iter->snapshot)
3184		/* reenable tracing if it was previously enabled */
3185		tracing_start_tr(tr);
3186
3187	__trace_array_put(tr);
3188
3189	mutex_unlock(&trace_types_lock);
3190
3191	mutex_destroy(&iter->mutex);
3192	free_cpumask_var(iter->started);
3193	kfree(iter->trace);
3194	kfree(iter->buffer_iter);
3195	seq_release_private(inode, file);
3196
3197	return 0;
3198}
3199
3200static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3201{
3202	struct trace_array *tr = inode->i_private;
3203
3204	trace_array_put(tr);
3205	return 0;
3206}
3207
3208static int tracing_single_release_tr(struct inode *inode, struct file *file)
3209{
3210	struct trace_array *tr = inode->i_private;
3211
3212	trace_array_put(tr);
3213
3214	return single_release(inode, file);
3215}
3216
3217static int tracing_open(struct inode *inode, struct file *file)
3218{
3219	struct trace_array *tr = inode->i_private;
3220	struct trace_iterator *iter;
3221	int ret = 0;
3222
3223	if (trace_array_get(tr) < 0)
3224		return -ENODEV;
3225
3226	/* If this file was open for write, then erase contents */
3227	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3228		int cpu = tracing_get_cpu(inode);
3229
3230		if (cpu == RING_BUFFER_ALL_CPUS)
3231			tracing_reset_online_cpus(&tr->trace_buffer);
3232		else
3233			tracing_reset(&tr->trace_buffer, cpu);
3234	}
3235
3236	if (file->f_mode & FMODE_READ) {
3237		iter = __tracing_open(inode, file, false);
3238		if (IS_ERR(iter))
3239			ret = PTR_ERR(iter);
3240		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3241			iter->iter_flags |= TRACE_FILE_LAT_FMT;
3242	}
3243
3244	if (ret < 0)
3245		trace_array_put(tr);
3246
3247	return ret;
3248}
3249
3250/*
3251 * Some tracers are not suitable for instance buffers.
3252 * A tracer is always available for the global array (toplevel)
3253 * or if it explicitly states that it is.
3254 */
3255static bool
3256trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3257{
3258	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3259}
3260
3261/* Find the next tracer that this trace array may use */
3262static struct tracer *
3263get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3264{
3265	while (t && !trace_ok_for_array(t, tr))
3266		t = t->next;
3267
3268	return t;
3269}
3270
3271static void *
3272t_next(struct seq_file *m, void *v, loff_t *pos)
3273{
3274	struct trace_array *tr = m->private;
3275	struct tracer *t = v;
3276
3277	(*pos)++;
3278
3279	if (t)
3280		t = get_tracer_for_array(tr, t->next);
3281
3282	return t;
3283}
3284
3285static void *t_start(struct seq_file *m, loff_t *pos)
3286{
3287	struct trace_array *tr = m->private;
3288	struct tracer *t;
3289	loff_t l = 0;
3290
3291	mutex_lock(&trace_types_lock);
3292
3293	t = get_tracer_for_array(tr, trace_types);
3294	for (; t && l < *pos; t = t_next(m, t, &l))
3295			;
3296
3297	return t;
3298}
3299
3300static void t_stop(struct seq_file *m, void *p)
3301{
3302	mutex_unlock(&trace_types_lock);
3303}
3304
3305static int t_show(struct seq_file *m, void *v)
3306{
3307	struct tracer *t = v;
3308
3309	if (!t)
3310		return 0;
3311
3312	seq_puts(m, t->name);
3313	if (t->next)
3314		seq_putc(m, ' ');
3315	else
3316		seq_putc(m, '\n');
3317
3318	return 0;
3319}
3320
3321static const struct seq_operations show_traces_seq_ops = {
3322	.start		= t_start,
3323	.next		= t_next,
3324	.stop		= t_stop,
3325	.show		= t_show,
3326};
3327
3328static int show_traces_open(struct inode *inode, struct file *file)
3329{
3330	struct trace_array *tr = inode->i_private;
3331	struct seq_file *m;
3332	int ret;
3333
3334	if (tracing_disabled)
3335		return -ENODEV;
3336
3337	ret = seq_open(file, &show_traces_seq_ops);
3338	if (ret)
3339		return ret;
3340
3341	m = file->private_data;
3342	m->private = tr;
3343
3344	return 0;
3345}
3346
3347static ssize_t
3348tracing_write_stub(struct file *filp, const char __user *ubuf,
3349		   size_t count, loff_t *ppos)
3350{
3351	return count;
3352}
3353
3354loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3355{
3356	int ret;
3357
3358	if (file->f_mode & FMODE_READ)
3359		ret = seq_lseek(file, offset, whence);
3360	else
3361		file->f_pos = ret = 0;
3362
3363	return ret;
3364}
3365
3366static const struct file_operations tracing_fops = {
3367	.open		= tracing_open,
3368	.read		= seq_read,
3369	.write		= tracing_write_stub,
3370	.llseek		= tracing_lseek,
3371	.release	= tracing_release,
3372};
3373
3374static const struct file_operations show_traces_fops = {
3375	.open		= show_traces_open,
3376	.read		= seq_read,
3377	.release	= seq_release,
3378	.llseek		= seq_lseek,
3379};
3380
3381/*
3382 * The tracer itself will not take this lock, but still we want
3383 * to provide a consistent cpumask to user-space:
3384 */
3385static DEFINE_MUTEX(tracing_cpumask_update_lock);
3386
3387/*
3388 * Temporary storage for the character representation of the
3389 * CPU bitmask (and one more byte for the newline):
3390 */
3391static char mask_str[NR_CPUS + 1];
3392
3393static ssize_t
3394tracing_cpumask_read(struct file *filp, char __user *ubuf,
3395		     size_t count, loff_t *ppos)
3396{
3397	struct trace_array *tr = file_inode(filp)->i_private;
3398	int len;
3399
3400	mutex_lock(&tracing_cpumask_update_lock);
3401
3402	len = snprintf(mask_str, count, "%*pb\n",
3403		       cpumask_pr_args(tr->tracing_cpumask));
3404	if (len >= count) {
3405		count = -EINVAL;
3406		goto out_err;
3407	}
3408	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3409
3410out_err:
3411	mutex_unlock(&tracing_cpumask_update_lock);
3412
3413	return count;
3414}
3415
3416static ssize_t
3417tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3418		      size_t count, loff_t *ppos)
3419{
3420	struct trace_array *tr = file_inode(filp)->i_private;
3421	cpumask_var_t tracing_cpumask_new;
3422	int err, cpu;
3423
3424	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3425		return -ENOMEM;
3426
3427	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3428	if (err)
3429		goto err_unlock;
3430
3431	mutex_lock(&tracing_cpumask_update_lock);
3432
3433	local_irq_disable();
3434	arch_spin_lock(&tr->max_lock);
3435	for_each_tracing_cpu(cpu) {
3436		/*
3437		 * Increase/decrease the disabled counter if we are
3438		 * about to flip a bit in the cpumask:
3439		 */
3440		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3441				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3442			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3443			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3444		}
3445		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3446				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3447			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3448			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3449		}
3450	}
3451	arch_spin_unlock(&tr->max_lock);
3452	local_irq_enable();
3453
3454	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3455
3456	mutex_unlock(&tracing_cpumask_update_lock);
3457	free_cpumask_var(tracing_cpumask_new);
3458
3459	return count;
3460
3461err_unlock:
3462	free_cpumask_var(tracing_cpumask_new);
3463
3464	return err;
3465}
3466
3467static const struct file_operations tracing_cpumask_fops = {
3468	.open		= tracing_open_generic_tr,
3469	.read		= tracing_cpumask_read,
3470	.write		= tracing_cpumask_write,
3471	.release	= tracing_release_generic_tr,
3472	.llseek		= generic_file_llseek,
3473};
3474
3475static int tracing_trace_options_show(struct seq_file *m, void *v)
3476{
3477	struct tracer_opt *trace_opts;
3478	struct trace_array *tr = m->private;
3479	u32 tracer_flags;
3480	int i;
3481
3482	mutex_lock(&trace_types_lock);
3483	tracer_flags = tr->current_trace->flags->val;
3484	trace_opts = tr->current_trace->flags->opts;
3485
3486	for (i = 0; trace_options[i]; i++) {
3487		if (tr->trace_flags & (1 << i))
3488			seq_printf(m, "%s\n", trace_options[i]);
3489		else
3490			seq_printf(m, "no%s\n", trace_options[i]);
3491	}
3492
3493	for (i = 0; trace_opts[i].name; i++) {
3494		if (tracer_flags & trace_opts[i].bit)
3495			seq_printf(m, "%s\n", trace_opts[i].name);
3496		else
3497			seq_printf(m, "no%s\n", trace_opts[i].name);
3498	}
3499	mutex_unlock(&trace_types_lock);
3500
3501	return 0;
3502}
3503
3504static int __set_tracer_option(struct trace_array *tr,
3505			       struct tracer_flags *tracer_flags,
3506			       struct tracer_opt *opts, int neg)
3507{
3508	struct tracer *trace = tr->current_trace;
3509	int ret;
3510
3511	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3512	if (ret)
3513		return ret;
3514
3515	if (neg)
3516		tracer_flags->val &= ~opts->bit;
3517	else
3518		tracer_flags->val |= opts->bit;
3519	return 0;
3520}
3521
3522/* Try to assign a tracer specific option */
3523static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3524{
3525	struct tracer *trace = tr->current_trace;
3526	struct tracer_flags *tracer_flags = trace->flags;
3527	struct tracer_opt *opts = NULL;
3528	int i;
3529
3530	for (i = 0; tracer_flags->opts[i].name; i++) {
3531		opts = &tracer_flags->opts[i];
3532
3533		if (strcmp(cmp, opts->name) == 0)
3534			return __set_tracer_option(tr, trace->flags, opts, neg);
3535	}
3536
3537	return -EINVAL;
3538}
3539
3540/* Some tracers require overwrite to stay enabled */
3541int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3542{
3543	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3544		return -1;
3545
3546	return 0;
3547}
3548
3549int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3550{
3551	/* do nothing if flag is already set */
3552	if (!!(tr->trace_flags & mask) == !!enabled)
3553		return 0;
3554
3555	/* Give the tracer a chance to approve the change */
3556	if (tr->current_trace->flag_changed)
3557		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3558			return -EINVAL;
3559
3560	if (enabled)
3561		tr->trace_flags |= mask;
3562	else
3563		tr->trace_flags &= ~mask;
3564
3565	if (mask == TRACE_ITER_RECORD_CMD)
3566		trace_event_enable_cmd_record(enabled);
3567
3568	if (mask == TRACE_ITER_OVERWRITE) {
3569		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3570#ifdef CONFIG_TRACER_MAX_TRACE
3571		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3572#endif
3573	}
3574
3575	if (mask == TRACE_ITER_PRINTK) {
3576		trace_printk_start_stop_comm(enabled);
3577		trace_printk_control(enabled);
3578	}
3579
3580	return 0;
3581}
3582
3583static int trace_set_options(struct trace_array *tr, char *option)
3584{
3585	char *cmp;
3586	int neg = 0;
3587	int ret = -ENODEV;
3588	int i;
3589	size_t orig_len = strlen(option);
3590
3591	cmp = strstrip(option);
3592
3593	if (strncmp(cmp, "no", 2) == 0) {
3594		neg = 1;
3595		cmp += 2;
3596	}
3597
3598	mutex_lock(&trace_types_lock);
3599
3600	for (i = 0; trace_options[i]; i++) {
3601		if (strcmp(cmp, trace_options[i]) == 0) {
3602			ret = set_tracer_flag(tr, 1 << i, !neg);
3603			break;
3604		}
3605	}
3606
3607	/* If no option could be set, test the specific tracer options */
3608	if (!trace_options[i])
3609		ret = set_tracer_option(tr, cmp, neg);
3610
3611	mutex_unlock(&trace_types_lock);
3612
3613	/*
3614	 * If the first trailing whitespace is replaced with '\0' by strstrip,
3615	 * turn it back into a space.
3616	 */
3617	if (orig_len > strlen(option))
3618		option[strlen(option)] = ' ';
3619
3620	return ret;
3621}
3622
3623static void __init apply_trace_boot_options(void)
3624{
3625	char *buf = trace_boot_options_buf;
3626	char *option;
3627
3628	while (true) {
3629		option = strsep(&buf, ",");
3630
3631		if (!option)
3632			break;
3633
3634		if (*option)
3635			trace_set_options(&global_trace, option);
3636
3637		/* Put back the comma to allow this to be called again */
3638		if (buf)
3639			*(buf - 1) = ',';
3640	}
3641}
3642
3643static ssize_t
3644tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3645			size_t cnt, loff_t *ppos)
3646{
3647	struct seq_file *m = filp->private_data;
3648	struct trace_array *tr = m->private;
3649	char buf[64];
3650	int ret;
3651
3652	if (cnt >= sizeof(buf))
3653		return -EINVAL;
3654
3655	if (copy_from_user(&buf, ubuf, cnt))
3656		return -EFAULT;
3657
3658	buf[cnt] = 0;
3659
3660	ret = trace_set_options(tr, buf);
3661	if (ret < 0)
3662		return ret;
3663
3664	*ppos += cnt;
3665
3666	return cnt;
3667}
3668
3669static int tracing_trace_options_open(struct inode *inode, struct file *file)
3670{
3671	struct trace_array *tr = inode->i_private;
3672	int ret;
3673
3674	if (tracing_disabled)
3675		return -ENODEV;
3676
3677	if (trace_array_get(tr) < 0)
3678		return -ENODEV;
3679
3680	ret = single_open(file, tracing_trace_options_show, inode->i_private);
3681	if (ret < 0)
3682		trace_array_put(tr);
3683
3684	return ret;
3685}
3686
3687static const struct file_operations tracing_iter_fops = {
3688	.open		= tracing_trace_options_open,
3689	.read		= seq_read,
3690	.llseek		= seq_lseek,
3691	.release	= tracing_single_release_tr,
3692	.write		= tracing_trace_options_write,
3693};
3694
3695static const char readme_msg[] =
3696	"tracing mini-HOWTO:\n\n"
3697	"# echo 0 > tracing_on : quick way to disable tracing\n"
3698	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3699	" Important files:\n"
3700	"  trace\t\t\t- The static contents of the buffer\n"
3701	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
3702	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3703	"  current_tracer\t- function and latency tracers\n"
3704	"  available_tracers\t- list of configured tracers for current_tracer\n"
3705	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3706	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3707	"  trace_clock\t\t-change the clock used to order events\n"
3708	"       local:   Per cpu clock but may not be synced across CPUs\n"
3709	"      global:   Synced across CPUs but slows tracing down.\n"
3710	"     counter:   Not a clock, but just an increment\n"
3711	"      uptime:   Jiffy counter from time of boot\n"
3712	"        perf:   Same clock that perf events use\n"
3713#ifdef CONFIG_X86_64
3714	"     x86-tsc:   TSC cycle counter\n"
3715#endif
3716	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3717	"  tracing_cpumask\t- Limit which CPUs to trace\n"
3718	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3719	"\t\t\t  Remove sub-buffer with rmdir\n"
3720	"  trace_options\t\t- Set format or modify how tracing happens\n"
3721	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3722	"\t\t\t  option name\n"
3723	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3724#ifdef CONFIG_DYNAMIC_FTRACE
3725	"\n  available_filter_functions - list of functions that can be filtered on\n"
3726	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
3727	"\t\t\t  functions\n"
3728	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3729	"\t     modules: Can select a group via module\n"
3730	"\t      Format: :mod:<module-name>\n"
3731	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3732	"\t    triggers: a command to perform when function is hit\n"
3733	"\t      Format: <function>:<trigger>[:count]\n"
3734	"\t     trigger: traceon, traceoff\n"
3735	"\t\t      enable_event:<system>:<event>\n"
3736	"\t\t      disable_event:<system>:<event>\n"
3737#ifdef CONFIG_STACKTRACE
3738	"\t\t      stacktrace\n"
3739#endif
3740#ifdef CONFIG_TRACER_SNAPSHOT
3741	"\t\t      snapshot\n"
3742#endif
3743	"\t\t      dump\n"
3744	"\t\t      cpudump\n"
3745	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3746	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3747	"\t     The first one will disable tracing every time do_fault is hit\n"
3748	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3749	"\t       The first time do trap is hit and it disables tracing, the\n"
3750	"\t       counter will decrement to 2. If tracing is already disabled,\n"
3751	"\t       the counter will not decrement. It only decrements when the\n"
3752	"\t       trigger did work\n"
3753	"\t     To remove trigger without count:\n"
3754	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3755	"\t     To remove trigger with a count:\n"
3756	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3757	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3758	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3759	"\t    modules: Can select a group via module command :mod:\n"
3760	"\t    Does not accept triggers\n"
3761#endif /* CONFIG_DYNAMIC_FTRACE */
3762#ifdef CONFIG_FUNCTION_TRACER
3763	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3764	"\t\t    (function)\n"
3765#endif
3766#ifdef CONFIG_FUNCTION_GRAPH_TRACER
3767	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3768	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3769	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3770#endif
3771#ifdef CONFIG_TRACER_SNAPSHOT
3772	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3773	"\t\t\t  snapshot buffer. Read the contents for more\n"
3774	"\t\t\t  information\n"
3775#endif
3776#ifdef CONFIG_STACK_TRACER
3777	"  stack_trace\t\t- Shows the max stack trace when active\n"
3778	"  stack_max_size\t- Shows current max stack size that was traced\n"
3779	"\t\t\t  Write into this file to reset the max size (trigger a\n"
3780	"\t\t\t  new trace)\n"
3781#ifdef CONFIG_DYNAMIC_FTRACE
3782	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3783	"\t\t\t  traces\n"
3784#endif
3785#endif /* CONFIG_STACK_TRACER */
3786	"  events/\t\t- Directory containing all trace event subsystems:\n"
3787	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3788	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
3789	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3790	"\t\t\t  events\n"
3791	"      filter\t\t- If set, only events passing filter are traced\n"
3792	"  events/<system>/<event>/\t- Directory containing control files for\n"
3793	"\t\t\t  <event>:\n"
3794	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3795	"      filter\t\t- If set, only events passing filter are traced\n"
3796	"      trigger\t\t- If set, a command to perform when event is hit\n"
3797	"\t    Format: <trigger>[:count][if <filter>]\n"
3798	"\t   trigger: traceon, traceoff\n"
3799	"\t            enable_event:<system>:<event>\n"
3800	"\t            disable_event:<system>:<event>\n"
3801#ifdef CONFIG_STACKTRACE
3802	"\t\t    stacktrace\n"
3803#endif
3804#ifdef CONFIG_TRACER_SNAPSHOT
3805	"\t\t    snapshot\n"
3806#endif
3807	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3808	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3809	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3810	"\t                  events/block/block_unplug/trigger\n"
3811	"\t   The first disables tracing every time block_unplug is hit.\n"
3812	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3813	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3814	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3815	"\t   Like function triggers, the counter is only decremented if it\n"
3816	"\t    enabled or disabled tracing.\n"
3817	"\t   To remove a trigger without a count:\n"
3818	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
3819	"\t   To remove a trigger with a count:\n"
3820	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3821	"\t   Filters can be ignored when removing a trigger.\n"
3822;
3823
3824static ssize_t
3825tracing_readme_read(struct file *filp, char __user *ubuf,
3826		       size_t cnt, loff_t *ppos)
3827{
3828	return simple_read_from_buffer(ubuf, cnt, ppos,
3829					readme_msg, strlen(readme_msg));
3830}
3831
3832static const struct file_operations tracing_readme_fops = {
3833	.open		= tracing_open_generic,
3834	.read		= tracing_readme_read,
3835	.llseek		= generic_file_llseek,
3836};
3837
3838static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3839{
3840	unsigned int *ptr = v;
3841
3842	if (*pos || m->count)
3843		ptr++;
3844
3845	(*pos)++;
3846
3847	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3848	     ptr++) {
3849		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3850			continue;
3851
3852		return ptr;
3853	}
3854
3855	return NULL;
3856}
3857
3858static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3859{
3860	void *v;
3861	loff_t l = 0;
3862
3863	preempt_disable();
3864	arch_spin_lock(&trace_cmdline_lock);
3865
3866	v = &savedcmd->map_cmdline_to_pid[0];
3867	while (l <= *pos) {
3868		v = saved_cmdlines_next(m, v, &l);
3869		if (!v)
3870			return NULL;
3871	}
3872
3873	return v;
3874}
3875
3876static void saved_cmdlines_stop(struct seq_file *m, void *v)
3877{
3878	arch_spin_unlock(&trace_cmdline_lock);
3879	preempt_enable();
3880}
3881
3882static int saved_cmdlines_show(struct seq_file *m, void *v)
3883{
3884	char buf[TASK_COMM_LEN];
3885	unsigned int *pid = v;
3886
3887	__trace_find_cmdline(*pid, buf);
3888	seq_printf(m, "%d %s\n", *pid, buf);
3889	return 0;
3890}
3891
3892static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3893	.start		= saved_cmdlines_start,
3894	.next		= saved_cmdlines_next,
3895	.stop		= saved_cmdlines_stop,
3896	.show		= saved_cmdlines_show,
3897};
3898
3899static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3900{
3901	if (tracing_disabled)
3902		return -ENODEV;
3903
3904	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3905}
3906
3907static const struct file_operations tracing_saved_cmdlines_fops = {
3908	.open		= tracing_saved_cmdlines_open,
3909	.read		= seq_read,
3910	.llseek		= seq_lseek,
3911	.release	= seq_release,
3912};
3913
3914static ssize_t
3915tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3916				 size_t cnt, loff_t *ppos)
3917{
3918	char buf[64];
3919	int r;
3920
3921	arch_spin_lock(&trace_cmdline_lock);
3922	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3923	arch_spin_unlock(&trace_cmdline_lock);
3924
3925	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3926}
3927
3928static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3929{
3930	kfree(s->saved_cmdlines);
3931	kfree(s->map_cmdline_to_pid);
3932	kfree(s);
3933}
3934
3935static int tracing_resize_saved_cmdlines(unsigned int val)
3936{
3937	struct saved_cmdlines_buffer *s, *savedcmd_temp;
3938
3939	s = kmalloc(sizeof(*s), GFP_KERNEL);
3940	if (!s)
3941		return -ENOMEM;
3942
3943	if (allocate_cmdlines_buffer(val, s) < 0) {
3944		kfree(s);
3945		return -ENOMEM;
3946	}
3947
3948	arch_spin_lock(&trace_cmdline_lock);
3949	savedcmd_temp = savedcmd;
3950	savedcmd = s;
3951	arch_spin_unlock(&trace_cmdline_lock);
3952	free_saved_cmdlines_buffer(savedcmd_temp);
3953
3954	return 0;
3955}
3956
3957static ssize_t
3958tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3959				  size_t cnt, loff_t *ppos)
3960{
3961	unsigned long val;
3962	int ret;
3963
3964	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3965	if (ret)
3966		return ret;
3967
3968	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
3969	if (!val || val > PID_MAX_DEFAULT)
3970		return -EINVAL;
3971
3972	ret = tracing_resize_saved_cmdlines((unsigned int)val);
3973	if (ret < 0)
3974		return ret;
3975
3976	*ppos += cnt;
3977
3978	return cnt;
3979}
3980
3981static const struct file_operations tracing_saved_cmdlines_size_fops = {
3982	.open		= tracing_open_generic,
3983	.read		= tracing_saved_cmdlines_size_read,
3984	.write		= tracing_saved_cmdlines_size_write,
3985};
3986
3987#ifdef CONFIG_TRACE_ENUM_MAP_FILE
3988static union trace_enum_map_item *
3989update_enum_map(union trace_enum_map_item *ptr)
3990{
3991	if (!ptr->map.enum_string) {
3992		if (ptr->tail.next) {
3993			ptr = ptr->tail.next;
3994			/* Set ptr to the next real item (skip head) */
3995			ptr++;
3996		} else
3997			return NULL;
3998	}
3999	return ptr;
4000}
4001
4002static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4003{
4004	union trace_enum_map_item *ptr = v;
4005
4006	/*
4007	 * Paranoid! If ptr points to end, we don't want to increment past it.
4008	 * This really should never happen.
4009	 */
4010	ptr = update_enum_map(ptr);
4011	if (WARN_ON_ONCE(!ptr))
4012		return NULL;
4013
4014	ptr++;
4015
4016	(*pos)++;
4017
4018	ptr = update_enum_map(ptr);
4019
4020	return ptr;
4021}
4022
4023static void *enum_map_start(struct seq_file *m, loff_t *pos)
4024{
4025	union trace_enum_map_item *v;
4026	loff_t l = 0;
4027
4028	mutex_lock(&trace_enum_mutex);
4029
4030	v = trace_enum_maps;
4031	if (v)
4032		v++;
4033
4034	while (v && l < *pos) {
4035		v = enum_map_next(m, v, &l);
4036	}
4037
4038	return v;
4039}
4040
4041static void enum_map_stop(struct seq_file *m, void *v)
4042{
4043	mutex_unlock(&trace_enum_mutex);
4044}
4045
4046static int enum_map_show(struct seq_file *m, void *v)
4047{
4048	union trace_enum_map_item *ptr = v;
4049
4050	seq_printf(m, "%s %ld (%s)\n",
4051		   ptr->map.enum_string, ptr->map.enum_value,
4052		   ptr->map.system);
4053
4054	return 0;
4055}
4056
4057static const struct seq_operations tracing_enum_map_seq_ops = {
4058	.start		= enum_map_start,
4059	.next		= enum_map_next,
4060	.stop		= enum_map_stop,
4061	.show		= enum_map_show,
4062};
4063
4064static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4065{
4066	if (tracing_disabled)
4067		return -ENODEV;
4068
4069	return seq_open(filp, &tracing_enum_map_seq_ops);
4070}
4071
4072static const struct file_operations tracing_enum_map_fops = {
4073	.open		= tracing_enum_map_open,
4074	.read		= seq_read,
4075	.llseek		= seq_lseek,
4076	.release	= seq_release,
4077};
4078
4079static inline union trace_enum_map_item *
4080trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4081{
4082	/* Return tail of array given the head */
4083	return ptr + ptr->head.length + 1;
4084}
4085
4086static void
4087trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4088			   int len)
4089{
4090	struct trace_enum_map **stop;
4091	struct trace_enum_map **map;
4092	union trace_enum_map_item *map_array;
4093	union trace_enum_map_item *ptr;
4094
4095	stop = start + len;
4096
4097	/*
4098	 * The trace_enum_maps contains the map plus a head and tail item,
4099	 * where the head holds the module and length of array, and the
4100	 * tail holds a pointer to the next list.
4101	 */
4102	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4103	if (!map_array) {
4104		pr_warning("Unable to allocate trace enum mapping\n");
4105		return;
4106	}
4107
4108	mutex_lock(&trace_enum_mutex);
4109
4110	if (!trace_enum_maps)
4111		trace_enum_maps = map_array;
4112	else {
4113		ptr = trace_enum_maps;
4114		for (;;) {
4115			ptr = trace_enum_jmp_to_tail(ptr);
4116			if (!ptr->tail.next)
4117				break;
4118			ptr = ptr->tail.next;
4119
4120		}
4121		ptr->tail.next = map_array;
4122	}
4123	map_array->head.mod = mod;
4124	map_array->head.length = len;
4125	map_array++;
4126
4127	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4128		map_array->map = **map;
4129		map_array++;
4130	}
4131	memset(map_array, 0, sizeof(*map_array));
4132
4133	mutex_unlock(&trace_enum_mutex);
4134}
4135
4136static void trace_create_enum_file(struct dentry *d_tracer)
4137{
4138	trace_create_file("enum_map", 0444, d_tracer,
4139			  NULL, &tracing_enum_map_fops);
4140}
4141
4142#else /* CONFIG_TRACE_ENUM_MAP_FILE */
4143static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4144static inline void trace_insert_enum_map_file(struct module *mod,
4145			      struct trace_enum_map **start, int len) { }
4146#endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4147
4148static void trace_insert_enum_map(struct module *mod,
4149				  struct trace_enum_map **start, int len)
4150{
4151	struct trace_enum_map **map;
4152
4153	if (len <= 0)
4154		return;
4155
4156	map = start;
4157
4158	trace_event_enum_update(map, len);
4159
4160	trace_insert_enum_map_file(mod, start, len);
4161}
4162
4163static ssize_t
4164tracing_set_trace_read(struct file *filp, char __user *ubuf,
4165		       size_t cnt, loff_t *ppos)
4166{
4167	struct trace_array *tr = filp->private_data;
4168	char buf[MAX_TRACER_SIZE+2];
4169	int r;
4170
4171	mutex_lock(&trace_types_lock);
4172	r = sprintf(buf, "%s\n", tr->current_trace->name);
4173	mutex_unlock(&trace_types_lock);
4174
4175	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4176}
4177
4178int tracer_init(struct tracer *t, struct trace_array *tr)
4179{
4180	tracing_reset_online_cpus(&tr->trace_buffer);
4181	return t->init(tr);
4182}
4183
4184static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4185{
4186	int cpu;
4187
4188	for_each_tracing_cpu(cpu)
4189		per_cpu_ptr(buf->data, cpu)->entries = val;
4190}
4191
4192#ifdef CONFIG_TRACER_MAX_TRACE
4193/* resize @tr's buffer to the size of @size_tr's entries */
4194static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4195					struct trace_buffer *size_buf, int cpu_id)
4196{
4197	int cpu, ret = 0;
4198
4199	if (cpu_id == RING_BUFFER_ALL_CPUS) {
4200		for_each_tracing_cpu(cpu) {
4201			ret = ring_buffer_resize(trace_buf->buffer,
4202				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4203			if (ret < 0)
4204				break;
4205			per_cpu_ptr(trace_buf->data, cpu)->entries =
4206				per_cpu_ptr(size_buf->data, cpu)->entries;
4207		}
4208	} else {
4209		ret = ring_buffer_resize(trace_buf->buffer,
4210				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4211		if (ret == 0)
4212			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4213				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4214	}
4215
4216	return ret;
4217}
4218#endif /* CONFIG_TRACER_MAX_TRACE */
4219
4220static int __tracing_resize_ring_buffer(struct trace_array *tr,
4221					unsigned long size, int cpu)
4222{
4223	int ret;
4224
4225	/*
4226	 * If kernel or user changes the size of the ring buffer
4227	 * we use the size that was given, and we can forget about
4228	 * expanding it later.
4229	 */
4230	ring_buffer_expanded = true;
4231
4232	/* May be called before buffers are initialized */
4233	if (!tr->trace_buffer.buffer)
4234		return 0;
4235
4236	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4237	if (ret < 0)
4238		return ret;
4239
4240#ifdef CONFIG_TRACER_MAX_TRACE
4241	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4242	    !tr->current_trace->use_max_tr)
4243		goto out;
4244
4245	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4246	if (ret < 0) {
4247		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4248						     &tr->trace_buffer, cpu);
4249		if (r < 0) {
4250			/*
4251			 * AARGH! We are left with different
4252			 * size max buffer!!!!
4253			 * The max buffer is our "snapshot" buffer.
4254			 * When a tracer needs a snapshot (one of the
4255			 * latency tracers), it swaps the max buffer
4256			 * with the saved snap shot. We succeeded to
4257			 * update the size of the main buffer, but failed to
4258			 * update the size of the max buffer. But when we tried
4259			 * to reset the main buffer to the original size, we
4260			 * failed there too. This is very unlikely to
4261			 * happen, but if it does, warn and kill all
4262			 * tracing.
4263			 */
4264			WARN_ON(1);
4265			tracing_disabled = 1;
4266		}
4267		return ret;
4268	}
4269
4270	if (cpu == RING_BUFFER_ALL_CPUS)
4271		set_buffer_entries(&tr->max_buffer, size);
4272	else
4273		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4274
4275 out:
4276#endif /* CONFIG_TRACER_MAX_TRACE */
4277
4278	if (cpu == RING_BUFFER_ALL_CPUS)
4279		set_buffer_entries(&tr->trace_buffer, size);
4280	else
4281		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4282
4283	return ret;
4284}
4285
4286static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4287					  unsigned long size, int cpu_id)
4288{
4289	int ret = size;
4290
4291	mutex_lock(&trace_types_lock);
4292
4293	if (cpu_id != RING_BUFFER_ALL_CPUS) {
4294		/* make sure, this cpu is enabled in the mask */
4295		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4296			ret = -EINVAL;
4297			goto out;
4298		}
4299	}
4300
4301	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4302	if (ret < 0)
4303		ret = -ENOMEM;
4304
4305out:
4306	mutex_unlock(&trace_types_lock);
4307
4308	return ret;
4309}
4310
4311
4312/**
4313 * tracing_update_buffers - used by tracing facility to expand ring buffers
4314 *
4315 * To save on memory when the tracing is never used on a system with it
4316 * configured in. The ring buffers are set to a minimum size. But once
4317 * a user starts to use the tracing facility, then they need to grow
4318 * to their default size.
4319 *
4320 * This function is to be called when a tracer is about to be used.
4321 */
4322int tracing_update_buffers(void)
4323{
4324	int ret = 0;
4325
4326	mutex_lock(&trace_types_lock);
4327	if (!ring_buffer_expanded)
4328		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4329						RING_BUFFER_ALL_CPUS);
4330	mutex_unlock(&trace_types_lock);
4331
4332	return ret;
4333}
4334
4335struct trace_option_dentry;
4336
4337static void
4338create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4339
4340/*
4341 * Used to clear out the tracer before deletion of an instance.
4342 * Must have trace_types_lock held.
4343 */
4344static void tracing_set_nop(struct trace_array *tr)
4345{
4346	if (tr->current_trace == &nop_trace)
4347		return;
4348
4349	tr->current_trace->enabled--;
4350
4351	if (tr->current_trace->reset)
4352		tr->current_trace->reset(tr);
4353
4354	tr->current_trace = &nop_trace;
4355}
4356
4357static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4358{
4359	/* Only enable if the directory has been created already. */
4360	if (!tr->dir)
4361		return;
4362
4363	create_trace_option_files(tr, t);
4364}
4365
4366static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4367{
4368	struct tracer *t;
4369#ifdef CONFIG_TRACER_MAX_TRACE
4370	bool had_max_tr;
4371#endif
4372	int ret = 0;
4373
4374	mutex_lock(&trace_types_lock);
4375
4376	if (!ring_buffer_expanded) {
4377		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4378						RING_BUFFER_ALL_CPUS);
4379		if (ret < 0)
4380			goto out;
4381		ret = 0;
4382	}
4383
4384	for (t = trace_types; t; t = t->next) {
4385		if (strcmp(t->name, buf) == 0)
4386			break;
4387	}
4388	if (!t) {
4389		ret = -EINVAL;
4390		goto out;
4391	}
4392	if (t == tr->current_trace)
4393		goto out;
4394
4395	/* Some tracers are only allowed for the top level buffer */
4396	if (!trace_ok_for_array(t, tr)) {
4397		ret = -EINVAL;
4398		goto out;
4399	}
4400
4401	/* If trace pipe files are being read, we can't change the tracer */
4402	if (tr->current_trace->ref) {
4403		ret = -EBUSY;
4404		goto out;
4405	}
4406
4407	trace_branch_disable();
4408
4409	tr->current_trace->enabled--;
4410
4411	if (tr->current_trace->reset)
4412		tr->current_trace->reset(tr);
4413
4414	/* Current trace needs to be nop_trace before synchronize_sched */
4415	tr->current_trace = &nop_trace;
4416
4417#ifdef CONFIG_TRACER_MAX_TRACE
4418	had_max_tr = tr->allocated_snapshot;
4419
4420	if (had_max_tr && !t->use_max_tr) {
4421		/*
4422		 * We need to make sure that the update_max_tr sees that
4423		 * current_trace changed to nop_trace to keep it from
4424		 * swapping the buffers after we resize it.
4425		 * The update_max_tr is called from interrupts disabled
4426		 * so a synchronized_sched() is sufficient.
4427		 */
4428		synchronize_sched();
4429		free_snapshot(tr);
4430	}
4431#endif
4432
4433#ifdef CONFIG_TRACER_MAX_TRACE
4434	if (t->use_max_tr && !had_max_tr) {
4435		ret = alloc_snapshot(tr);
4436		if (ret < 0)
4437			goto out;
4438	}
4439#endif
4440
4441	if (t->init) {
4442		ret = tracer_init(t, tr);
4443		if (ret)
4444			goto out;
4445	}
4446
4447	tr->current_trace = t;
4448	tr->current_trace->enabled++;
4449	trace_branch_enable(tr);
4450 out:
4451	mutex_unlock(&trace_types_lock);
4452
4453	return ret;
4454}
4455
4456static ssize_t
4457tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4458			size_t cnt, loff_t *ppos)
4459{
4460	struct trace_array *tr = filp->private_data;
4461	char buf[MAX_TRACER_SIZE+1];
4462	int i;
4463	size_t ret;
4464	int err;
4465
4466	ret = cnt;
4467
4468	if (cnt > MAX_TRACER_SIZE)
4469		cnt = MAX_TRACER_SIZE;
4470
4471	if (copy_from_user(&buf, ubuf, cnt))
4472		return -EFAULT;
4473
4474	buf[cnt] = 0;
4475
4476	/* strip ending whitespace. */
4477	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4478		buf[i] = 0;
4479
4480	err = tracing_set_tracer(tr, buf);
4481	if (err)
4482		return err;
4483
4484	*ppos += ret;
4485
4486	return ret;
4487}
4488
4489static ssize_t
4490tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
4491		   size_t cnt, loff_t *ppos)
4492{
4493	char buf[64];
4494	int r;
4495
4496	r = snprintf(buf, sizeof(buf), "%ld\n",
4497		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4498	if (r > sizeof(buf))
4499		r = sizeof(buf);
4500	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4501}
4502
4503static ssize_t
4504tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
4505		    size_t cnt, loff_t *ppos)
4506{
4507	unsigned long val;
4508	int ret;
4509
4510	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4511	if (ret)
4512		return ret;
4513
4514	*ptr = val * 1000;
4515
4516	return cnt;
4517}
4518
4519static ssize_t
4520tracing_thresh_read(struct file *filp, char __user *ubuf,
4521		    size_t cnt, loff_t *ppos)
4522{
4523	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
4524}
4525
4526static ssize_t
4527tracing_thresh_write(struct file *filp, const char __user *ubuf,
4528		     size_t cnt, loff_t *ppos)
4529{
4530	struct trace_array *tr = filp->private_data;
4531	int ret;
4532
4533	mutex_lock(&trace_types_lock);
4534	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
4535	if (ret < 0)
4536		goto out;
4537
4538	if (tr->current_trace->update_thresh) {
4539		ret = tr->current_trace->update_thresh(tr);
4540		if (ret < 0)
4541			goto out;
4542	}
4543
4544	ret = cnt;
4545out:
4546	mutex_unlock(&trace_types_lock);
4547
4548	return ret;
4549}
4550
4551#ifdef CONFIG_TRACER_MAX_TRACE
4552
4553static ssize_t
4554tracing_max_lat_read(struct file *filp, char __user *ubuf,
4555		     size_t cnt, loff_t *ppos)
4556{
4557	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
4558}
4559
4560static ssize_t
4561tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4562		      size_t cnt, loff_t *ppos)
4563{
4564	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
4565}
4566
4567#endif
4568
4569static int tracing_open_pipe(struct inode *inode, struct file *filp)
4570{
4571	struct trace_array *tr = inode->i_private;
4572	struct trace_iterator *iter;
4573	int ret = 0;
4574
4575	if (tracing_disabled)
4576		return -ENODEV;
4577
4578	if (trace_array_get(tr) < 0)
4579		return -ENODEV;
4580
4581	mutex_lock(&trace_types_lock);
4582
4583	/* create a buffer to store the information to pass to userspace */
4584	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4585	if (!iter) {
4586		ret = -ENOMEM;
4587		__trace_array_put(tr);
4588		goto out;
4589	}
4590
4591	trace_seq_init(&iter->seq);
4592	iter->trace = tr->current_trace;
4593
4594	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4595		ret = -ENOMEM;
4596		goto fail;
4597	}
4598
4599	/* trace pipe does not show start of buffer */
4600	cpumask_setall(iter->started);
4601
4602	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4603		iter->iter_flags |= TRACE_FILE_LAT_FMT;
4604
4605	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4606	if (trace_clocks[tr->clock_id].in_ns)
4607		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4608
4609	iter->tr = tr;
4610	iter->trace_buffer = &tr->trace_buffer;
4611	iter->cpu_file = tracing_get_cpu(inode);
4612	mutex_init(&iter->mutex);
4613	filp->private_data = iter;
4614
4615	if (iter->trace->pipe_open)
4616		iter->trace->pipe_open(iter);
4617
4618	nonseekable_open(inode, filp);
4619
4620	tr->current_trace->ref++;
4621out:
4622	mutex_unlock(&trace_types_lock);
4623	return ret;
4624
4625fail:
4626	kfree(iter->trace);
4627	kfree(iter);
4628	__trace_array_put(tr);
4629	mutex_unlock(&trace_types_lock);
4630	return ret;
4631}
4632
4633static int tracing_release_pipe(struct inode *inode, struct file *file)
4634{
4635	struct trace_iterator *iter = file->private_data;
4636	struct trace_array *tr = inode->i_private;
4637
4638	mutex_lock(&trace_types_lock);
4639
4640	tr->current_trace->ref--;
4641
4642	if (iter->trace->pipe_close)
4643		iter->trace->pipe_close(iter);
4644
4645	mutex_unlock(&trace_types_lock);
4646
4647	free_cpumask_var(iter->started);
4648	mutex_destroy(&iter->mutex);
4649	kfree(iter);
4650
4651	trace_array_put(tr);
4652
4653	return 0;
4654}
4655
4656static unsigned int
4657trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4658{
4659	struct trace_array *tr = iter->tr;
4660
4661	/* Iterators are static, they should be filled or empty */
4662	if (trace_buffer_iter(iter, iter->cpu_file))
4663		return POLLIN | POLLRDNORM;
4664
4665	if (tr->trace_flags & TRACE_ITER_BLOCK)
4666		/*
4667		 * Always select as readable when in blocking mode
4668		 */
4669		return POLLIN | POLLRDNORM;
4670	else
4671		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4672					     filp, poll_table);
4673}
4674
4675static unsigned int
4676tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4677{
4678	struct trace_iterator *iter = filp->private_data;
4679
4680	return trace_poll(iter, filp, poll_table);
4681}
4682
4683/* Must be called with iter->mutex held. */
4684static int tracing_wait_pipe(struct file *filp)
4685{
4686	struct trace_iterator *iter = filp->private_data;
4687	int ret;
4688
4689	while (trace_empty(iter)) {
4690
4691		if ((filp->f_flags & O_NONBLOCK)) {
4692			return -EAGAIN;
4693		}
4694
4695		/*
4696		 * We block until we read something and tracing is disabled.
4697		 * We still block if tracing is disabled, but we have never
4698		 * read anything. This allows a user to cat this file, and
4699		 * then enable tracing. But after we have read something,
4700		 * we give an EOF when tracing is again disabled.
4701		 *
4702		 * iter->pos will be 0 if we haven't read anything.
4703		 */
4704		if (!tracing_is_on() && iter->pos)
4705			break;
4706
4707		mutex_unlock(&iter->mutex);
4708
4709		ret = wait_on_pipe(iter, false);
4710
4711		mutex_lock(&iter->mutex);
4712
4713		if (ret)
4714			return ret;
4715	}
4716
4717	return 1;
4718}
4719
4720/*
4721 * Consumer reader.
4722 */
4723static ssize_t
4724tracing_read_pipe(struct file *filp, char __user *ubuf,
4725		  size_t cnt, loff_t *ppos)
4726{
4727	struct trace_iterator *iter = filp->private_data;
4728	ssize_t sret;
4729
4730	/* return any leftover data */
4731	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4732	if (sret != -EBUSY)
4733		return sret;
4734
4735	trace_seq_init(&iter->seq);
4736
4737	/*
4738	 * Avoid more than one consumer on a single file descriptor
4739	 * This is just a matter of traces coherency, the ring buffer itself
4740	 * is protected.
4741	 */
4742	mutex_lock(&iter->mutex);
4743	if (iter->trace->read) {
4744		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4745		if (sret)
4746			goto out;
4747	}
4748
4749waitagain:
4750	sret = tracing_wait_pipe(filp);
4751	if (sret <= 0)
4752		goto out;
4753
4754	/* stop when tracing is finished */
4755	if (trace_empty(iter)) {
4756		sret = 0;
4757		goto out;
4758	}
4759
4760	if (cnt >= PAGE_SIZE)
4761		cnt = PAGE_SIZE - 1;
4762
4763	/* reset all but tr, trace, and overruns */
4764	memset(&iter->seq, 0,
4765	       sizeof(struct trace_iterator) -
4766	       offsetof(struct trace_iterator, seq));
4767	cpumask_clear(iter->started);
4768	iter->pos = -1;
4769
4770	trace_event_read_lock();
4771	trace_access_lock(iter->cpu_file);
4772	while (trace_find_next_entry_inc(iter) != NULL) {
4773		enum print_line_t ret;
4774		int save_len = iter->seq.seq.len;
4775
4776		ret = print_trace_line(iter);
4777		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4778			/* don't print partial lines */
4779			iter->seq.seq.len = save_len;
4780			break;
4781		}
4782		if (ret != TRACE_TYPE_NO_CONSUME)
4783			trace_consume(iter);
4784
4785		if (trace_seq_used(&iter->seq) >= cnt)
4786			break;
4787
4788		/*
4789		 * Setting the full flag means we reached the trace_seq buffer
4790		 * size and we should leave by partial output condition above.
4791		 * One of the trace_seq_* functions is not used properly.
4792		 */
4793		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4794			  iter->ent->type);
4795	}
4796	trace_access_unlock(iter->cpu_file);
4797	trace_event_read_unlock();
4798
4799	/* Now copy what we have to the user */
4800	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4801	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4802		trace_seq_init(&iter->seq);
4803
4804	/*
4805	 * If there was nothing to send to user, in spite of consuming trace
4806	 * entries, go back to wait for more entries.
4807	 */
4808	if (sret == -EBUSY)
4809		goto waitagain;
4810
4811out:
4812	mutex_unlock(&iter->mutex);
4813
4814	return sret;
4815}
4816
4817static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4818				     unsigned int idx)
4819{
4820	__free_page(spd->pages[idx]);
4821}
4822
4823static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4824	.can_merge		= 0,
4825	.confirm		= generic_pipe_buf_confirm,
4826	.release		= generic_pipe_buf_release,
4827	.steal			= generic_pipe_buf_steal,
4828	.get			= generic_pipe_buf_get,
4829};
4830
4831static size_t
4832tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4833{
4834	size_t count;
4835	int save_len;
4836	int ret;
4837
4838	/* Seq buffer is page-sized, exactly what we need. */
4839	for (;;) {
4840		save_len = iter->seq.seq.len;
4841		ret = print_trace_line(iter);
4842
4843		if (trace_seq_has_overflowed(&iter->seq)) {
4844			iter->seq.seq.len = save_len;
4845			break;
4846		}
4847
4848		/*
4849		 * This should not be hit, because it should only
4850		 * be set if the iter->seq overflowed. But check it
4851		 * anyway to be safe.
4852		 */
4853		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4854			iter->seq.seq.len = save_len;
4855			break;
4856		}
4857
4858		count = trace_seq_used(&iter->seq) - save_len;
4859		if (rem < count) {
4860			rem = 0;
4861			iter->seq.seq.len = save_len;
4862			break;
4863		}
4864
4865		if (ret != TRACE_TYPE_NO_CONSUME)
4866			trace_consume(iter);
4867		rem -= count;
4868		if (!trace_find_next_entry_inc(iter))	{
4869			rem = 0;
4870			iter->ent = NULL;
4871			break;
4872		}
4873	}
4874
4875	return rem;
4876}
4877
4878static ssize_t tracing_splice_read_pipe(struct file *filp,
4879					loff_t *ppos,
4880					struct pipe_inode_info *pipe,
4881					size_t len,
4882					unsigned int flags)
4883{
4884	struct page *pages_def[PIPE_DEF_BUFFERS];
4885	struct partial_page partial_def[PIPE_DEF_BUFFERS];
4886	struct trace_iterator *iter = filp->private_data;
4887	struct splice_pipe_desc spd = {
4888		.pages		= pages_def,
4889		.partial	= partial_def,
4890		.nr_pages	= 0, /* This gets updated below. */
4891		.nr_pages_max	= PIPE_DEF_BUFFERS,
4892		.flags		= flags,
4893		.ops		= &tracing_pipe_buf_ops,
4894		.spd_release	= tracing_spd_release_pipe,
4895	};
4896	ssize_t ret;
4897	size_t rem;
4898	unsigned int i;
4899
4900	if (splice_grow_spd(pipe, &spd))
4901		return -ENOMEM;
4902
4903	mutex_lock(&iter->mutex);
4904
4905	if (iter->trace->splice_read) {
4906		ret = iter->trace->splice_read(iter, filp,
4907					       ppos, pipe, len, flags);
4908		if (ret)
4909			goto out_err;
4910	}
4911
4912	ret = tracing_wait_pipe(filp);
4913	if (ret <= 0)
4914		goto out_err;
4915
4916	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4917		ret = -EFAULT;
4918		goto out_err;
4919	}
4920
4921	trace_event_read_lock();
4922	trace_access_lock(iter->cpu_file);
4923
4924	/* Fill as many pages as possible. */
4925	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4926		spd.pages[i] = alloc_page(GFP_KERNEL);
4927		if (!spd.pages[i])
4928			break;
4929
4930		rem = tracing_fill_pipe_page(rem, iter);
4931
4932		/* Copy the data into the page, so we can start over. */
4933		ret = trace_seq_to_buffer(&iter->seq,
4934					  page_address(spd.pages[i]),
4935					  trace_seq_used(&iter->seq));
4936		if (ret < 0) {
4937			__free_page(spd.pages[i]);
4938			break;
4939		}
4940		spd.partial[i].offset = 0;
4941		spd.partial[i].len = trace_seq_used(&iter->seq);
4942
4943		trace_seq_init(&iter->seq);
4944	}
4945
4946	trace_access_unlock(iter->cpu_file);
4947	trace_event_read_unlock();
4948	mutex_unlock(&iter->mutex);
4949
4950	spd.nr_pages = i;
4951
4952	if (i)
4953		ret = splice_to_pipe(pipe, &spd);
4954	else
4955		ret = 0;
4956out:
4957	splice_shrink_spd(&spd);
4958	return ret;
4959
4960out_err:
4961	mutex_unlock(&iter->mutex);
4962	goto out;
4963}
4964
4965static ssize_t
4966tracing_entries_read(struct file *filp, char __user *ubuf,
4967		     size_t cnt, loff_t *ppos)
4968{
4969	struct inode *inode = file_inode(filp);
4970	struct trace_array *tr = inode->i_private;
4971	int cpu = tracing_get_cpu(inode);
4972	char buf[64];
4973	int r = 0;
4974	ssize_t ret;
4975
4976	mutex_lock(&trace_types_lock);
4977
4978	if (cpu == RING_BUFFER_ALL_CPUS) {
4979		int cpu, buf_size_same;
4980		unsigned long size;
4981
4982		size = 0;
4983		buf_size_same = 1;
4984		/* check if all cpu sizes are same */
4985		for_each_tracing_cpu(cpu) {
4986			/* fill in the size from first enabled cpu */
4987			if (size == 0)
4988				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4989			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4990				buf_size_same = 0;
4991				break;
4992			}
4993		}
4994
4995		if (buf_size_same) {
4996			if (!ring_buffer_expanded)
4997				r = sprintf(buf, "%lu (expanded: %lu)\n",
4998					    size >> 10,
4999					    trace_buf_size >> 10);
5000			else
5001				r = sprintf(buf, "%lu\n", size >> 10);
5002		} else
5003			r = sprintf(buf, "X\n");
5004	} else
5005		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5006
5007	mutex_unlock(&trace_types_lock);
5008
5009	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5010	return ret;
5011}
5012
5013static ssize_t
5014tracing_entries_write(struct file *filp, const char __user *ubuf,
5015		      size_t cnt, loff_t *ppos)
5016{
5017	struct inode *inode = file_inode(filp);
5018	struct trace_array *tr = inode->i_private;
5019	unsigned long val;
5020	int ret;
5021
5022	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5023	if (ret)
5024		return ret;
5025
5026	/* must have at least 1 entry */
5027	if (!val)
5028		return -EINVAL;
5029
5030	/* value is in KB */
5031	val <<= 10;
5032	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5033	if (ret < 0)
5034		return ret;
5035
5036	*ppos += cnt;
5037
5038	return cnt;
5039}
5040
5041static ssize_t
5042tracing_total_entries_read(struct file *filp, char __user *ubuf,
5043				size_t cnt, loff_t *ppos)
5044{
5045	struct trace_array *tr = filp->private_data;
5046	char buf[64];
5047	int r, cpu;
5048	unsigned long size = 0, expanded_size = 0;
5049
5050	mutex_lock(&trace_types_lock);
5051	for_each_tracing_cpu(cpu) {
5052		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5053		if (!ring_buffer_expanded)
5054			expanded_size += trace_buf_size >> 10;
5055	}
5056	if (ring_buffer_expanded)
5057		r = sprintf(buf, "%lu\n", size);
5058	else
5059		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5060	mutex_unlock(&trace_types_lock);
5061
5062	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5063}
5064
5065static ssize_t
5066tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5067			  size_t cnt, loff_t *ppos)
5068{
5069	/*
5070	 * There is no need to read what the user has written, this function
5071	 * is just to make sure that there is no error when "echo" is used
5072	 */
5073
5074	*ppos += cnt;
5075
5076	return cnt;
5077}
5078
5079static int
5080tracing_free_buffer_release(struct inode *inode, struct file *filp)
5081{
5082	struct trace_array *tr = inode->i_private;
5083
5084	/* disable tracing ? */
5085	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5086		tracer_tracing_off(tr);
5087	/* resize the ring buffer to 0 */
5088	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5089
5090	trace_array_put(tr);
5091
5092	return 0;
5093}
5094
5095static ssize_t
5096tracing_mark_write(struct file *filp, const char __user *ubuf,
5097					size_t cnt, loff_t *fpos)
5098{
5099	unsigned long addr = (unsigned long)ubuf;
5100	struct trace_array *tr = filp->private_data;
5101	struct ring_buffer_event *event;
5102	struct ring_buffer *buffer;
5103	struct print_entry *entry;
5104	unsigned long irq_flags;
5105	struct page *pages[2];
5106	void *map_page[2];
5107	int nr_pages = 1;
5108	ssize_t written;
5109	int offset;
5110	int size;
5111	int len;
5112	int ret;
5113	int i;
5114
5115	if (tracing_disabled)
5116		return -EINVAL;
5117
5118	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5119		return -EINVAL;
5120
5121	if (cnt > TRACE_BUF_SIZE)
5122		cnt = TRACE_BUF_SIZE;
5123
5124	/*
5125	 * Userspace is injecting traces into the kernel trace buffer.
5126	 * We want to be as non intrusive as possible.
5127	 * To do so, we do not want to allocate any special buffers
5128	 * or take any locks, but instead write the userspace data
5129	 * straight into the ring buffer.
5130	 *
5131	 * First we need to pin the userspace buffer into memory,
5132	 * which, most likely it is, because it just referenced it.
5133	 * But there's no guarantee that it is. By using get_user_pages_fast()
5134	 * and kmap_atomic/kunmap_atomic() we can get access to the
5135	 * pages directly. We then write the data directly into the
5136	 * ring buffer.
5137	 */
5138	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5139
5140	/* check if we cross pages */
5141	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
5142		nr_pages = 2;
5143
5144	offset = addr & (PAGE_SIZE - 1);
5145	addr &= PAGE_MASK;
5146
5147	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
5148	if (ret < nr_pages) {
5149		while (--ret >= 0)
5150			put_page(pages[ret]);
5151		written = -EFAULT;
5152		goto out;
5153	}
5154
5155	for (i = 0; i < nr_pages; i++)
5156		map_page[i] = kmap_atomic(pages[i]);
5157
5158	local_save_flags(irq_flags);
5159	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5160	buffer = tr->trace_buffer.buffer;
5161	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5162					  irq_flags, preempt_count());
5163	if (!event) {
5164		/* Ring buffer disabled, return as if not open for write */
5165		written = -EBADF;
5166		goto out_unlock;
5167	}
5168
5169	entry = ring_buffer_event_data(event);
5170	entry->ip = _THIS_IP_;
5171
5172	if (nr_pages == 2) {
5173		len = PAGE_SIZE - offset;
5174		memcpy(&entry->buf, map_page[0] + offset, len);
5175		memcpy(&entry->buf[len], map_page[1], cnt - len);
5176	} else
5177		memcpy(&entry->buf, map_page[0] + offset, cnt);
5178
5179	if (entry->buf[cnt - 1] != '\n') {
5180		entry->buf[cnt] = '\n';
5181		entry->buf[cnt + 1] = '\0';
5182	} else
5183		entry->buf[cnt] = '\0';
5184
5185	__buffer_unlock_commit(buffer, event);
5186
5187	written = cnt;
5188
5189	*fpos += written;
5190
5191 out_unlock:
5192	for (i = nr_pages - 1; i >= 0; i--) {
5193		kunmap_atomic(map_page[i]);
5194		put_page(pages[i]);
5195	}
5196 out:
5197	return written;
5198}
5199
5200static int tracing_clock_show(struct seq_file *m, void *v)
5201{
5202	struct trace_array *tr = m->private;
5203	int i;
5204
5205	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5206		seq_printf(m,
5207			"%s%s%s%s", i ? " " : "",
5208			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5209			i == tr->clock_id ? "]" : "");
5210	seq_putc(m, '\n');
5211
5212	return 0;
5213}
5214
5215static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5216{
5217	int i;
5218
5219	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5220		if (strcmp(trace_clocks[i].name, clockstr) == 0)
5221			break;
5222	}
5223	if (i == ARRAY_SIZE(trace_clocks))
5224		return -EINVAL;
5225
5226	mutex_lock(&trace_types_lock);
5227
5228	tr->clock_id = i;
5229
5230	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5231
5232	/*
5233	 * New clock may not be consistent with the previous clock.
5234	 * Reset the buffer so that it doesn't have incomparable timestamps.
5235	 */
5236	tracing_reset_online_cpus(&tr->trace_buffer);
5237
5238#ifdef CONFIG_TRACER_MAX_TRACE
5239	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5240		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5241	tracing_reset_online_cpus(&tr->max_buffer);
5242#endif
5243
5244	mutex_unlock(&trace_types_lock);
5245
5246	return 0;
5247}
5248
5249static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5250				   size_t cnt, loff_t *fpos)
5251{
5252	struct seq_file *m = filp->private_data;
5253	struct trace_array *tr = m->private;
5254	char buf[64];
5255	const char *clockstr;
5256	int ret;
5257
5258	if (cnt >= sizeof(buf))
5259		return -EINVAL;
5260
5261	if (copy_from_user(&buf, ubuf, cnt))
5262		return -EFAULT;
5263
5264	buf[cnt] = 0;
5265
5266	clockstr = strstrip(buf);
5267
5268	ret = tracing_set_clock(tr, clockstr);
5269	if (ret)
5270		return ret;
5271
5272	*fpos += cnt;
5273
5274	return cnt;
5275}
5276
5277static int tracing_clock_open(struct inode *inode, struct file *file)
5278{
5279	struct trace_array *tr = inode->i_private;
5280	int ret;
5281
5282	if (tracing_disabled)
5283		return -ENODEV;
5284
5285	if (trace_array_get(tr))
5286		return -ENODEV;
5287
5288	ret = single_open(file, tracing_clock_show, inode->i_private);
5289	if (ret < 0)
5290		trace_array_put(tr);
5291
5292	return ret;
5293}
5294
5295struct ftrace_buffer_info {
5296	struct trace_iterator	iter;
5297	void			*spare;
5298	unsigned int		read;
5299};
5300
5301#ifdef CONFIG_TRACER_SNAPSHOT
5302static int tracing_snapshot_open(struct inode *inode, struct file *file)
5303{
5304	struct trace_array *tr = inode->i_private;
5305	struct trace_iterator *iter;
5306	struct seq_file *m;
5307	int ret = 0;
5308
5309	if (trace_array_get(tr) < 0)
5310		return -ENODEV;
5311
5312	if (file->f_mode & FMODE_READ) {
5313		iter = __tracing_open(inode, file, true);
5314		if (IS_ERR(iter))
5315			ret = PTR_ERR(iter);
5316	} else {
5317		/* Writes still need the seq_file to hold the private data */
5318		ret = -ENOMEM;
5319		m = kzalloc(sizeof(*m), GFP_KERNEL);
5320		if (!m)
5321			goto out;
5322		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5323		if (!iter) {
5324			kfree(m);
5325			goto out;
5326		}
5327		ret = 0;
5328
5329		iter->tr = tr;
5330		iter->trace_buffer = &tr->max_buffer;
5331		iter->cpu_file = tracing_get_cpu(inode);
5332		m->private = iter;
5333		file->private_data = m;
5334	}
5335out:
5336	if (ret < 0)
5337		trace_array_put(tr);
5338
5339	return ret;
5340}
5341
5342static ssize_t
5343tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5344		       loff_t *ppos)
5345{
5346	struct seq_file *m = filp->private_data;
5347	struct trace_iterator *iter = m->private;
5348	struct trace_array *tr = iter->tr;
5349	unsigned long val;
5350	int ret;
5351
5352	ret = tracing_update_buffers();
5353	if (ret < 0)
5354		return ret;
5355
5356	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5357	if (ret)
5358		return ret;
5359
5360	mutex_lock(&trace_types_lock);
5361
5362	if (tr->current_trace->use_max_tr) {
5363		ret = -EBUSY;
5364		goto out;
5365	}
5366
5367	switch (val) {
5368	case 0:
5369		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5370			ret = -EINVAL;
5371			break;
5372		}
5373		if (tr->allocated_snapshot)
5374			free_snapshot(tr);
5375		break;
5376	case 1:
5377/* Only allow per-cpu swap if the ring buffer supports it */
5378#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5379		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5380			ret = -EINVAL;
5381			break;
5382		}
5383#endif
5384		if (!tr->allocated_snapshot) {
5385			ret = alloc_snapshot(tr);
5386			if (ret < 0)
5387				break;
5388		}
5389		local_irq_disable();
5390		/* Now, we're going to swap */
5391		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5392			update_max_tr(tr, current, smp_processor_id());
5393		else
5394			update_max_tr_single(tr, current, iter->cpu_file);
5395		local_irq_enable();
5396		break;
5397	default:
5398		if (tr->allocated_snapshot) {
5399			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5400				tracing_reset_online_cpus(&tr->max_buffer);
5401			else
5402				tracing_reset(&tr->max_buffer, iter->cpu_file);
5403		}
5404		break;
5405	}
5406
5407	if (ret >= 0) {
5408		*ppos += cnt;
5409		ret = cnt;
5410	}
5411out:
5412	mutex_unlock(&trace_types_lock);
5413	return ret;
5414}
5415
5416static int tracing_snapshot_release(struct inode *inode, struct file *file)
5417{
5418	struct seq_file *m = file->private_data;
5419	int ret;
5420
5421	ret = tracing_release(inode, file);
5422
5423	if (file->f_mode & FMODE_READ)
5424		return ret;
5425
5426	/* If write only, the seq_file is just a stub */
5427	if (m)
5428		kfree(m->private);
5429	kfree(m);
5430
5431	return 0;
5432}
5433
5434static int tracing_buffers_open(struct inode *inode, struct file *filp);
5435static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5436				    size_t count, loff_t *ppos);
5437static int tracing_buffers_release(struct inode *inode, struct file *file);
5438static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5439		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5440
5441static int snapshot_raw_open(struct inode *inode, struct file *filp)
5442{
5443	struct ftrace_buffer_info *info;
5444	int ret;
5445
5446	ret = tracing_buffers_open(inode, filp);
5447	if (ret < 0)
5448		return ret;
5449
5450	info = filp->private_data;
5451
5452	if (info->iter.trace->use_max_tr) {
5453		tracing_buffers_release(inode, filp);
5454		return -EBUSY;
5455	}
5456
5457	info->iter.snapshot = true;
5458	info->iter.trace_buffer = &info->iter.tr->max_buffer;
5459
5460	return ret;
5461}
5462
5463#endif /* CONFIG_TRACER_SNAPSHOT */
5464
5465
5466static const struct file_operations tracing_thresh_fops = {
5467	.open		= tracing_open_generic,
5468	.read		= tracing_thresh_read,
5469	.write		= tracing_thresh_write,
5470	.llseek		= generic_file_llseek,
5471};
5472
5473#ifdef CONFIG_TRACER_MAX_TRACE
5474static const struct file_operations tracing_max_lat_fops = {
5475	.open		= tracing_open_generic,
5476	.read		= tracing_max_lat_read,
5477	.write		= tracing_max_lat_write,
5478	.llseek		= generic_file_llseek,
5479};
5480#endif
5481
5482static const struct file_operations set_tracer_fops = {
5483	.open		= tracing_open_generic,
5484	.read		= tracing_set_trace_read,
5485	.write		= tracing_set_trace_write,
5486	.llseek		= generic_file_llseek,
5487};
5488
5489static const struct file_operations tracing_pipe_fops = {
5490	.open		= tracing_open_pipe,
5491	.poll		= tracing_poll_pipe,
5492	.read		= tracing_read_pipe,
5493	.splice_read	= tracing_splice_read_pipe,
5494	.release	= tracing_release_pipe,
5495	.llseek		= no_llseek,
5496};
5497
5498static const struct file_operations tracing_entries_fops = {
5499	.open		= tracing_open_generic_tr,
5500	.read		= tracing_entries_read,
5501	.write		= tracing_entries_write,
5502	.llseek		= generic_file_llseek,
5503	.release	= tracing_release_generic_tr,
5504};
5505
5506static const struct file_operations tracing_total_entries_fops = {
5507	.open		= tracing_open_generic_tr,
5508	.read		= tracing_total_entries_read,
5509	.llseek		= generic_file_llseek,
5510	.release	= tracing_release_generic_tr,
5511};
5512
5513static const struct file_operations tracing_free_buffer_fops = {
5514	.open		= tracing_open_generic_tr,
5515	.write		= tracing_free_buffer_write,
5516	.release	= tracing_free_buffer_release,
5517};
5518
5519static const struct file_operations tracing_mark_fops = {
5520	.open		= tracing_open_generic_tr,
5521	.write		= tracing_mark_write,
5522	.llseek		= generic_file_llseek,
5523	.release	= tracing_release_generic_tr,
5524};
5525
5526static const struct file_operations trace_clock_fops = {
5527	.open		= tracing_clock_open,
5528	.read		= seq_read,
5529	.llseek		= seq_lseek,
5530	.release	= tracing_single_release_tr,
5531	.write		= tracing_clock_write,
5532};
5533
5534#ifdef CONFIG_TRACER_SNAPSHOT
5535static const struct file_operations snapshot_fops = {
5536	.open		= tracing_snapshot_open,
5537	.read		= seq_read,
5538	.write		= tracing_snapshot_write,
5539	.llseek		= tracing_lseek,
5540	.release	= tracing_snapshot_release,
5541};
5542
5543static const struct file_operations snapshot_raw_fops = {
5544	.open		= snapshot_raw_open,
5545	.read		= tracing_buffers_read,
5546	.release	= tracing_buffers_release,
5547	.splice_read	= tracing_buffers_splice_read,
5548	.llseek		= no_llseek,
5549};
5550
5551#endif /* CONFIG_TRACER_SNAPSHOT */
5552
5553static int tracing_buffers_open(struct inode *inode, struct file *filp)
5554{
5555	struct trace_array *tr = inode->i_private;
5556	struct ftrace_buffer_info *info;
5557	int ret;
5558
5559	if (tracing_disabled)
5560		return -ENODEV;
5561
5562	if (trace_array_get(tr) < 0)
5563		return -ENODEV;
5564
5565	info = kzalloc(sizeof(*info), GFP_KERNEL);
5566	if (!info) {
5567		trace_array_put(tr);
5568		return -ENOMEM;
5569	}
5570
5571	mutex_lock(&trace_types_lock);
5572
5573	info->iter.tr		= tr;
5574	info->iter.cpu_file	= tracing_get_cpu(inode);
5575	info->iter.trace	= tr->current_trace;
5576	info->iter.trace_buffer = &tr->trace_buffer;
5577	info->spare		= NULL;
5578	/* Force reading ring buffer for first read */
5579	info->read		= (unsigned int)-1;
5580
5581	filp->private_data = info;
5582
5583	tr->current_trace->ref++;
5584
5585	mutex_unlock(&trace_types_lock);
5586
5587	ret = nonseekable_open(inode, filp);
5588	if (ret < 0)
5589		trace_array_put(tr);
5590
5591	return ret;
5592}
5593
5594static unsigned int
5595tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5596{
5597	struct ftrace_buffer_info *info = filp->private_data;
5598	struct trace_iterator *iter = &info->iter;
5599
5600	return trace_poll(iter, filp, poll_table);
5601}
5602
5603static ssize_t
5604tracing_buffers_read(struct file *filp, char __user *ubuf,
5605		     size_t count, loff_t *ppos)
5606{
5607	struct ftrace_buffer_info *info = filp->private_data;
5608	struct trace_iterator *iter = &info->iter;
5609	ssize_t ret;
5610	ssize_t size;
5611
5612	if (!count)
5613		return 0;
5614
5615#ifdef CONFIG_TRACER_MAX_TRACE
5616	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5617		return -EBUSY;
5618#endif
5619
5620	if (!info->spare)
5621		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5622							  iter->cpu_file);
5623	if (!info->spare)
5624		return -ENOMEM;
5625
5626	/* Do we have previous read data to read? */
5627	if (info->read < PAGE_SIZE)
5628		goto read;
5629
5630 again:
5631	trace_access_lock(iter->cpu_file);
5632	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5633				    &info->spare,
5634				    count,
5635				    iter->cpu_file, 0);
5636	trace_access_unlock(iter->cpu_file);
5637
5638	if (ret < 0) {
5639		if (trace_empty(iter)) {
5640			if ((filp->f_flags & O_NONBLOCK))
5641				return -EAGAIN;
5642
5643			ret = wait_on_pipe(iter, false);
5644			if (ret)
5645				return ret;
5646
5647			goto again;
5648		}
5649		return 0;
5650	}
5651
5652	info->read = 0;
5653 read:
5654	size = PAGE_SIZE - info->read;
5655	if (size > count)
5656		size = count;
5657
5658	ret = copy_to_user(ubuf, info->spare + info->read, size);
5659	if (ret == size)
5660		return -EFAULT;
5661
5662	size -= ret;
5663
5664	*ppos += size;
5665	info->read += size;
5666
5667	return size;
5668}
5669
5670static int tracing_buffers_release(struct inode *inode, struct file *file)
5671{
5672	struct ftrace_buffer_info *info = file->private_data;
5673	struct trace_iterator *iter = &info->iter;
5674
5675	mutex_lock(&trace_types_lock);
5676
5677	iter->tr->current_trace->ref--;
5678
5679	__trace_array_put(iter->tr);
5680
5681	if (info->spare)
5682		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5683	kfree(info);
5684
5685	mutex_unlock(&trace_types_lock);
5686
5687	return 0;
5688}
5689
5690struct buffer_ref {
5691	struct ring_buffer	*buffer;
5692	void			*page;
5693	int			ref;
5694};
5695
5696static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5697				    struct pipe_buffer *buf)
5698{
5699	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5700
5701	if (--ref->ref)
5702		return;
5703
5704	ring_buffer_free_read_page(ref->buffer, ref->page);
5705	kfree(ref);
5706	buf->private = 0;
5707}
5708
5709static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5710				struct pipe_buffer *buf)
5711{
5712	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5713
5714	ref->ref++;
5715}
5716
5717/* Pipe buffer operations for a buffer. */
5718static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5719	.can_merge		= 0,
5720	.confirm		= generic_pipe_buf_confirm,
5721	.release		= buffer_pipe_buf_release,
5722	.steal			= generic_pipe_buf_steal,
5723	.get			= buffer_pipe_buf_get,
5724};
5725
5726/*
5727 * Callback from splice_to_pipe(), if we need to release some pages
5728 * at the end of the spd in case we error'ed out in filling the pipe.
5729 */
5730static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5731{
5732	struct buffer_ref *ref =
5733		(struct buffer_ref *)spd->partial[i].private;
5734
5735	if (--ref->ref)
5736		return;
5737
5738	ring_buffer_free_read_page(ref->buffer, ref->page);
5739	kfree(ref);
5740	spd->partial[i].private = 0;
5741}
5742
5743static ssize_t
5744tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5745			    struct pipe_inode_info *pipe, size_t len,
5746			    unsigned int flags)
5747{
5748	struct ftrace_buffer_info *info = file->private_data;
5749	struct trace_iterator *iter = &info->iter;
5750	struct partial_page partial_def[PIPE_DEF_BUFFERS];
5751	struct page *pages_def[PIPE_DEF_BUFFERS];
5752	struct splice_pipe_desc spd = {
5753		.pages		= pages_def,
5754		.partial	= partial_def,
5755		.nr_pages_max	= PIPE_DEF_BUFFERS,
5756		.flags		= flags,
5757		.ops		= &buffer_pipe_buf_ops,
5758		.spd_release	= buffer_spd_release,
5759	};
5760	struct buffer_ref *ref;
5761	int entries, size, i;
5762	ssize_t ret = 0;
5763
5764#ifdef CONFIG_TRACER_MAX_TRACE
5765	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
5766		return -EBUSY;
5767#endif
5768
5769	if (splice_grow_spd(pipe, &spd))
5770		return -ENOMEM;
5771
5772	if (*ppos & (PAGE_SIZE - 1))
5773		return -EINVAL;
5774
5775	if (len & (PAGE_SIZE - 1)) {
5776		if (len < PAGE_SIZE)
5777			return -EINVAL;
5778		len &= PAGE_MASK;
5779	}
5780
5781 again:
5782	trace_access_lock(iter->cpu_file);
5783	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5784
5785	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5786		struct page *page;
5787		int r;
5788
5789		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5790		if (!ref) {
5791			ret = -ENOMEM;
5792			break;
5793		}
5794
5795		ref->ref = 1;
5796		ref->buffer = iter->trace_buffer->buffer;
5797		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5798		if (!ref->page) {
5799			ret = -ENOMEM;
5800			kfree(ref);
5801			break;
5802		}
5803
5804		r = ring_buffer_read_page(ref->buffer, &ref->page,
5805					  len, iter->cpu_file, 1);
5806		if (r < 0) {
5807			ring_buffer_free_read_page(ref->buffer, ref->page);
5808			kfree(ref);
5809			break;
5810		}
5811
5812		/*
5813		 * zero out any left over data, this is going to
5814		 * user land.
5815		 */
5816		size = ring_buffer_page_len(ref->page);
5817		if (size < PAGE_SIZE)
5818			memset(ref->page + size, 0, PAGE_SIZE - size);
5819
5820		page = virt_to_page(ref->page);
5821
5822		spd.pages[i] = page;
5823		spd.partial[i].len = PAGE_SIZE;
5824		spd.partial[i].offset = 0;
5825		spd.partial[i].private = (unsigned long)ref;
5826		spd.nr_pages++;
5827		*ppos += PAGE_SIZE;
5828
5829		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5830	}
5831
5832	trace_access_unlock(iter->cpu_file);
5833	spd.nr_pages = i;
5834
5835	/* did we read anything? */
5836	if (!spd.nr_pages) {
5837		if (ret)
5838			return ret;
5839
5840		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
5841			return -EAGAIN;
5842
5843		ret = wait_on_pipe(iter, true);
5844		if (ret)
5845			return ret;
5846
5847		goto again;
5848	}
5849
5850	ret = splice_to_pipe(pipe, &spd);
5851	splice_shrink_spd(&spd);
5852
5853	return ret;
5854}
5855
5856static const struct file_operations tracing_buffers_fops = {
5857	.open		= tracing_buffers_open,
5858	.read		= tracing_buffers_read,
5859	.poll		= tracing_buffers_poll,
5860	.release	= tracing_buffers_release,
5861	.splice_read	= tracing_buffers_splice_read,
5862	.llseek		= no_llseek,
5863};
5864
5865static ssize_t
5866tracing_stats_read(struct file *filp, char __user *ubuf,
5867		   size_t count, loff_t *ppos)
5868{
5869	struct inode *inode = file_inode(filp);
5870	struct trace_array *tr = inode->i_private;
5871	struct trace_buffer *trace_buf = &tr->trace_buffer;
5872	int cpu = tracing_get_cpu(inode);
5873	struct trace_seq *s;
5874	unsigned long cnt;
5875	unsigned long long t;
5876	unsigned long usec_rem;
5877
5878	s = kmalloc(sizeof(*s), GFP_KERNEL);
5879	if (!s)
5880		return -ENOMEM;
5881
5882	trace_seq_init(s);
5883
5884	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5885	trace_seq_printf(s, "entries: %ld\n", cnt);
5886
5887	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5888	trace_seq_printf(s, "overrun: %ld\n", cnt);
5889
5890	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5891	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5892
5893	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5894	trace_seq_printf(s, "bytes: %ld\n", cnt);
5895
5896	if (trace_clocks[tr->clock_id].in_ns) {
5897		/* local or global for trace_clock */
5898		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5899		usec_rem = do_div(t, USEC_PER_SEC);
5900		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5901								t, usec_rem);
5902
5903		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5904		usec_rem = do_div(t, USEC_PER_SEC);
5905		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5906	} else {
5907		/* counter or tsc mode for trace_clock */
5908		trace_seq_printf(s, "oldest event ts: %llu\n",
5909				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5910
5911		trace_seq_printf(s, "now ts: %llu\n",
5912				ring_buffer_time_stamp(trace_buf->buffer, cpu));
5913	}
5914
5915	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5916	trace_seq_printf(s, "dropped events: %ld\n", cnt);
5917
5918	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5919	trace_seq_printf(s, "read events: %ld\n", cnt);
5920
5921	count = simple_read_from_buffer(ubuf, count, ppos,
5922					s->buffer, trace_seq_used(s));
5923
5924	kfree(s);
5925
5926	return count;
5927}
5928
5929static const struct file_operations tracing_stats_fops = {
5930	.open		= tracing_open_generic_tr,
5931	.read		= tracing_stats_read,
5932	.llseek		= generic_file_llseek,
5933	.release	= tracing_release_generic_tr,
5934};
5935
5936#ifdef CONFIG_DYNAMIC_FTRACE
5937
5938int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5939{
5940	return 0;
5941}
5942
5943static ssize_t
5944tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5945		  size_t cnt, loff_t *ppos)
5946{
5947	static char ftrace_dyn_info_buffer[1024];
5948	static DEFINE_MUTEX(dyn_info_mutex);
5949	unsigned long *p = filp->private_data;
5950	char *buf = ftrace_dyn_info_buffer;
5951	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5952	int r;
5953
5954	mutex_lock(&dyn_info_mutex);
5955	r = sprintf(buf, "%ld ", *p);
5956
5957	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5958	buf[r++] = '\n';
5959
5960	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5961
5962	mutex_unlock(&dyn_info_mutex);
5963
5964	return r;
5965}
5966
5967static const struct file_operations tracing_dyn_info_fops = {
5968	.open		= tracing_open_generic,
5969	.read		= tracing_read_dyn_info,
5970	.llseek		= generic_file_llseek,
5971};
5972#endif /* CONFIG_DYNAMIC_FTRACE */
5973
5974#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5975static void
5976ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5977{
5978	tracing_snapshot();
5979}
5980
5981static void
5982ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5983{
5984	unsigned long *count = (long *)data;
5985
5986	if (!*count)
5987		return;
5988
5989	if (*count != -1)
5990		(*count)--;
5991
5992	tracing_snapshot();
5993}
5994
5995static int
5996ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5997		      struct ftrace_probe_ops *ops, void *data)
5998{
5999	long count = (long)data;
6000
6001	seq_printf(m, "%ps:", (void *)ip);
6002
6003	seq_puts(m, "snapshot");
6004
6005	if (count == -1)
6006		seq_puts(m, ":unlimited\n");
6007	else
6008		seq_printf(m, ":count=%ld\n", count);
6009
6010	return 0;
6011}
6012
6013static struct ftrace_probe_ops snapshot_probe_ops = {
6014	.func			= ftrace_snapshot,
6015	.print			= ftrace_snapshot_print,
6016};
6017
6018static struct ftrace_probe_ops snapshot_count_probe_ops = {
6019	.func			= ftrace_count_snapshot,
6020	.print			= ftrace_snapshot_print,
6021};
6022
6023static int
6024ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6025			       char *glob, char *cmd, char *param, int enable)
6026{
6027	struct ftrace_probe_ops *ops;
6028	void *count = (void *)-1;
6029	char *number;
6030	int ret;
6031
6032	/* hash funcs only work with set_ftrace_filter */
6033	if (!enable)
6034		return -EINVAL;
6035
6036	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6037
6038	if (glob[0] == '!') {
6039		unregister_ftrace_function_probe_func(glob+1, ops);
6040		return 0;
6041	}
6042
6043	if (!param)
6044		goto out_reg;
6045
6046	number = strsep(&param, ":");
6047
6048	if (!strlen(number))
6049		goto out_reg;
6050
6051	/*
6052	 * We use the callback data field (which is a pointer)
6053	 * as our counter.
6054	 */
6055	ret = kstrtoul(number, 0, (unsigned long *)&count);
6056	if (ret)
6057		return ret;
6058
6059 out_reg:
6060	ret = register_ftrace_function_probe(glob, ops, count);
6061
6062	if (ret >= 0)
6063		alloc_snapshot(&global_trace);
6064
6065	return ret < 0 ? ret : 0;
6066}
6067
6068static struct ftrace_func_command ftrace_snapshot_cmd = {
6069	.name			= "snapshot",
6070	.func			= ftrace_trace_snapshot_callback,
6071};
6072
6073static __init int register_snapshot_cmd(void)
6074{
6075	return register_ftrace_command(&ftrace_snapshot_cmd);
6076}
6077#else
6078static inline __init int register_snapshot_cmd(void) { return 0; }
6079#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6080
6081static struct dentry *tracing_get_dentry(struct trace_array *tr)
6082{
6083	if (WARN_ON(!tr->dir))
6084		return ERR_PTR(-ENODEV);
6085
6086	/* Top directory uses NULL as the parent */
6087	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6088		return NULL;
6089
6090	/* All sub buffers have a descriptor */
6091	return tr->dir;
6092}
6093
6094static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6095{
6096	struct dentry *d_tracer;
6097
6098	if (tr->percpu_dir)
6099		return tr->percpu_dir;
6100
6101	d_tracer = tracing_get_dentry(tr);
6102	if (IS_ERR(d_tracer))
6103		return NULL;
6104
6105	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6106
6107	WARN_ONCE(!tr->percpu_dir,
6108		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6109
6110	return tr->percpu_dir;
6111}
6112
6113static struct dentry *
6114trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6115		      void *data, long cpu, const struct file_operations *fops)
6116{
6117	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6118
6119	if (ret) /* See tracing_get_cpu() */
6120		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6121	return ret;
6122}
6123
6124static void
6125tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6126{
6127	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6128	struct dentry *d_cpu;
6129	char cpu_dir[30]; /* 30 characters should be more than enough */
6130
6131	if (!d_percpu)
6132		return;
6133
6134	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6135	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6136	if (!d_cpu) {
6137		pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
6138		return;
6139	}
6140
6141	/* per cpu trace_pipe */
6142	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6143				tr, cpu, &tracing_pipe_fops);
6144
6145	/* per cpu trace */
6146	trace_create_cpu_file("trace", 0644, d_cpu,
6147				tr, cpu, &tracing_fops);
6148
6149	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6150				tr, cpu, &tracing_buffers_fops);
6151
6152	trace_create_cpu_file("stats", 0444, d_cpu,
6153				tr, cpu, &tracing_stats_fops);
6154
6155	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6156				tr, cpu, &tracing_entries_fops);
6157
6158#ifdef CONFIG_TRACER_SNAPSHOT
6159	trace_create_cpu_file("snapshot", 0644, d_cpu,
6160				tr, cpu, &snapshot_fops);
6161
6162	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6163				tr, cpu, &snapshot_raw_fops);
6164#endif
6165}
6166
6167#ifdef CONFIG_FTRACE_SELFTEST
6168/* Let selftest have access to static functions in this file */
6169#include "trace_selftest.c"
6170#endif
6171
6172static ssize_t
6173trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6174			loff_t *ppos)
6175{
6176	struct trace_option_dentry *topt = filp->private_data;
6177	char *buf;
6178
6179	if (topt->flags->val & topt->opt->bit)
6180		buf = "1\n";
6181	else
6182		buf = "0\n";
6183
6184	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6185}
6186
6187static ssize_t
6188trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6189			 loff_t *ppos)
6190{
6191	struct trace_option_dentry *topt = filp->private_data;
6192	unsigned long val;
6193	int ret;
6194
6195	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6196	if (ret)
6197		return ret;
6198
6199	if (val != 0 && val != 1)
6200		return -EINVAL;
6201
6202	if (!!(topt->flags->val & topt->opt->bit) != val) {
6203		mutex_lock(&trace_types_lock);
6204		ret = __set_tracer_option(topt->tr, topt->flags,
6205					  topt->opt, !val);
6206		mutex_unlock(&trace_types_lock);
6207		if (ret)
6208			return ret;
6209	}
6210
6211	*ppos += cnt;
6212
6213	return cnt;
6214}
6215
6216
6217static const struct file_operations trace_options_fops = {
6218	.open = tracing_open_generic,
6219	.read = trace_options_read,
6220	.write = trace_options_write,
6221	.llseek	= generic_file_llseek,
6222};
6223
6224/*
6225 * In order to pass in both the trace_array descriptor as well as the index
6226 * to the flag that the trace option file represents, the trace_array
6227 * has a character array of trace_flags_index[], which holds the index
6228 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6229 * The address of this character array is passed to the flag option file
6230 * read/write callbacks.
6231 *
6232 * In order to extract both the index and the trace_array descriptor,
6233 * get_tr_index() uses the following algorithm.
6234 *
6235 *   idx = *ptr;
6236 *
6237 * As the pointer itself contains the address of the index (remember
6238 * index[1] == 1).
6239 *
6240 * Then to get the trace_array descriptor, by subtracting that index
6241 * from the ptr, we get to the start of the index itself.
6242 *
6243 *   ptr - idx == &index[0]
6244 *
6245 * Then a simple container_of() from that pointer gets us to the
6246 * trace_array descriptor.
6247 */
6248static void get_tr_index(void *data, struct trace_array **ptr,
6249			 unsigned int *pindex)
6250{
6251	*pindex = *(unsigned char *)data;
6252
6253	*ptr = container_of(data - *pindex, struct trace_array,
6254			    trace_flags_index);
6255}
6256
6257static ssize_t
6258trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6259			loff_t *ppos)
6260{
6261	void *tr_index = filp->private_data;
6262	struct trace_array *tr;
6263	unsigned int index;
6264	char *buf;
6265
6266	get_tr_index(tr_index, &tr, &index);
6267
6268	if (tr->trace_flags & (1 << index))
6269		buf = "1\n";
6270	else
6271		buf = "0\n";
6272
6273	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6274}
6275
6276static ssize_t
6277trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6278			 loff_t *ppos)
6279{
6280	void *tr_index = filp->private_data;
6281	struct trace_array *tr;
6282	unsigned int index;
6283	unsigned long val;
6284	int ret;
6285
6286	get_tr_index(tr_index, &tr, &index);
6287
6288	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6289	if (ret)
6290		return ret;
6291
6292	if (val != 0 && val != 1)
6293		return -EINVAL;
6294
6295	mutex_lock(&trace_types_lock);
6296	ret = set_tracer_flag(tr, 1 << index, val);
6297	mutex_unlock(&trace_types_lock);
6298
6299	if (ret < 0)
6300		return ret;
6301
6302	*ppos += cnt;
6303
6304	return cnt;
6305}
6306
6307static const struct file_operations trace_options_core_fops = {
6308	.open = tracing_open_generic,
6309	.read = trace_options_core_read,
6310	.write = trace_options_core_write,
6311	.llseek = generic_file_llseek,
6312};
6313
6314struct dentry *trace_create_file(const char *name,
6315				 umode_t mode,
6316				 struct dentry *parent,
6317				 void *data,
6318				 const struct file_operations *fops)
6319{
6320	struct dentry *ret;
6321
6322	ret = tracefs_create_file(name, mode, parent, data, fops);
6323	if (!ret)
6324		pr_warning("Could not create tracefs '%s' entry\n", name);
6325
6326	return ret;
6327}
6328
6329
6330static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6331{
6332	struct dentry *d_tracer;
6333
6334	if (tr->options)
6335		return tr->options;
6336
6337	d_tracer = tracing_get_dentry(tr);
6338	if (IS_ERR(d_tracer))
6339		return NULL;
6340
6341	tr->options = tracefs_create_dir("options", d_tracer);
6342	if (!tr->options) {
6343		pr_warning("Could not create tracefs directory 'options'\n");
6344		return NULL;
6345	}
6346
6347	return tr->options;
6348}
6349
6350static void
6351create_trace_option_file(struct trace_array *tr,
6352			 struct trace_option_dentry *topt,
6353			 struct tracer_flags *flags,
6354			 struct tracer_opt *opt)
6355{
6356	struct dentry *t_options;
6357
6358	t_options = trace_options_init_dentry(tr);
6359	if (!t_options)
6360		return;
6361
6362	topt->flags = flags;
6363	topt->opt = opt;
6364	topt->tr = tr;
6365
6366	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6367				    &trace_options_fops);
6368
6369}
6370
6371static void
6372create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6373{
6374	struct trace_option_dentry *topts;
6375	struct trace_options *tr_topts;
6376	struct tracer_flags *flags;
6377	struct tracer_opt *opts;
6378	int cnt;
6379	int i;
6380
6381	if (!tracer)
6382		return;
6383
6384	flags = tracer->flags;
6385
6386	if (!flags || !flags->opts)
6387		return;
6388
6389	/*
6390	 * If this is an instance, only create flags for tracers
6391	 * the instance may have.
6392	 */
6393	if (!trace_ok_for_array(tracer, tr))
6394		return;
6395
6396	for (i = 0; i < tr->nr_topts; i++) {
6397		/*
6398		 * Check if these flags have already been added.
6399		 * Some tracers share flags.
6400		 */
6401		if (tr->topts[i].tracer->flags == tracer->flags)
6402			return;
6403	}
6404
6405	opts = flags->opts;
6406
6407	for (cnt = 0; opts[cnt].name; cnt++)
6408		;
6409
6410	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6411	if (!topts)
6412		return;
6413
6414	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
6415			    GFP_KERNEL);
6416	if (!tr_topts) {
6417		kfree(topts);
6418		return;
6419	}
6420
6421	tr->topts = tr_topts;
6422	tr->topts[tr->nr_topts].tracer = tracer;
6423	tr->topts[tr->nr_topts].topts = topts;
6424	tr->nr_topts++;
6425
6426	for (cnt = 0; opts[cnt].name; cnt++) {
6427		create_trace_option_file(tr, &topts[cnt], flags,
6428					 &opts[cnt]);
6429		WARN_ONCE(topts[cnt].entry == NULL,
6430			  "Failed to create trace option: %s",
6431			  opts[cnt].name);
6432	}
6433}
6434
6435static struct dentry *
6436create_trace_option_core_file(struct trace_array *tr,
6437			      const char *option, long index)
6438{
6439	struct dentry *t_options;
6440
6441	t_options = trace_options_init_dentry(tr);
6442	if (!t_options)
6443		return NULL;
6444
6445	return trace_create_file(option, 0644, t_options,
6446				 (void *)&tr->trace_flags_index[index],
6447				 &trace_options_core_fops);
6448}
6449
6450static void create_trace_options_dir(struct trace_array *tr)
6451{
6452	struct dentry *t_options;
6453	bool top_level = tr == &global_trace;
6454	int i;
6455
6456	t_options = trace_options_init_dentry(tr);
6457	if (!t_options)
6458		return;
6459
6460	for (i = 0; trace_options[i]; i++) {
6461		if (top_level ||
6462		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
6463			create_trace_option_core_file(tr, trace_options[i], i);
6464	}
6465}
6466
6467static ssize_t
6468rb_simple_read(struct file *filp, char __user *ubuf,
6469	       size_t cnt, loff_t *ppos)
6470{
6471	struct trace_array *tr = filp->private_data;
6472	char buf[64];
6473	int r;
6474
6475	r = tracer_tracing_is_on(tr);
6476	r = sprintf(buf, "%d\n", r);
6477
6478	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6479}
6480
6481static ssize_t
6482rb_simple_write(struct file *filp, const char __user *ubuf,
6483		size_t cnt, loff_t *ppos)
6484{
6485	struct trace_array *tr = filp->private_data;
6486	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6487	unsigned long val;
6488	int ret;
6489
6490	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6491	if (ret)
6492		return ret;
6493
6494	if (buffer) {
6495		mutex_lock(&trace_types_lock);
6496		if (val) {
6497			tracer_tracing_on(tr);
6498			if (tr->current_trace->start)
6499				tr->current_trace->start(tr);
6500		} else {
6501			tracer_tracing_off(tr);
6502			if (tr->current_trace->stop)
6503				tr->current_trace->stop(tr);
6504		}
6505		mutex_unlock(&trace_types_lock);
6506	}
6507
6508	(*ppos)++;
6509
6510	return cnt;
6511}
6512
6513static const struct file_operations rb_simple_fops = {
6514	.open		= tracing_open_generic_tr,
6515	.read		= rb_simple_read,
6516	.write		= rb_simple_write,
6517	.release	= tracing_release_generic_tr,
6518	.llseek		= default_llseek,
6519};
6520
6521struct dentry *trace_instance_dir;
6522
6523static void
6524init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6525
6526static int
6527allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6528{
6529	enum ring_buffer_flags rb_flags;
6530
6531	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6532
6533	buf->tr = tr;
6534
6535	buf->buffer = ring_buffer_alloc(size, rb_flags);
6536	if (!buf->buffer)
6537		return -ENOMEM;
6538
6539	buf->data = alloc_percpu(struct trace_array_cpu);
6540	if (!buf->data) {
6541		ring_buffer_free(buf->buffer);
6542		return -ENOMEM;
6543	}
6544
6545	/* Allocate the first page for all buffers */
6546	set_buffer_entries(&tr->trace_buffer,
6547			   ring_buffer_size(tr->trace_buffer.buffer, 0));
6548
6549	return 0;
6550}
6551
6552static int allocate_trace_buffers(struct trace_array *tr, int size)
6553{
6554	int ret;
6555
6556	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6557	if (ret)
6558		return ret;
6559
6560#ifdef CONFIG_TRACER_MAX_TRACE
6561	ret = allocate_trace_buffer(tr, &tr->max_buffer,
6562				    allocate_snapshot ? size : 1);
6563	if (WARN_ON(ret)) {
6564		ring_buffer_free(tr->trace_buffer.buffer);
6565		free_percpu(tr->trace_buffer.data);
6566		return -ENOMEM;
6567	}
6568	tr->allocated_snapshot = allocate_snapshot;
6569
6570	/*
6571	 * Only the top level trace array gets its snapshot allocated
6572	 * from the kernel command line.
6573	 */
6574	allocate_snapshot = false;
6575#endif
6576	return 0;
6577}
6578
6579static void free_trace_buffer(struct trace_buffer *buf)
6580{
6581	if (buf->buffer) {
6582		ring_buffer_free(buf->buffer);
6583		buf->buffer = NULL;
6584		free_percpu(buf->data);
6585		buf->data = NULL;
6586	}
6587}
6588
6589static void free_trace_buffers(struct trace_array *tr)
6590{
6591	if (!tr)
6592		return;
6593
6594	free_trace_buffer(&tr->trace_buffer);
6595
6596#ifdef CONFIG_TRACER_MAX_TRACE
6597	free_trace_buffer(&tr->max_buffer);
6598#endif
6599}
6600
6601static void init_trace_flags_index(struct trace_array *tr)
6602{
6603	int i;
6604
6605	/* Used by the trace options files */
6606	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
6607		tr->trace_flags_index[i] = i;
6608}
6609
6610static void __update_tracer_options(struct trace_array *tr)
6611{
6612	struct tracer *t;
6613
6614	for (t = trace_types; t; t = t->next)
6615		add_tracer_options(tr, t);
6616}
6617
6618static void update_tracer_options(struct trace_array *tr)
6619{
6620	mutex_lock(&trace_types_lock);
6621	__update_tracer_options(tr);
6622	mutex_unlock(&trace_types_lock);
6623}
6624
6625static int instance_mkdir(const char *name)
6626{
6627	struct trace_array *tr;
6628	int ret;
6629
6630	mutex_lock(&trace_types_lock);
6631
6632	ret = -EEXIST;
6633	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6634		if (tr->name && strcmp(tr->name, name) == 0)
6635			goto out_unlock;
6636	}
6637
6638	ret = -ENOMEM;
6639	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6640	if (!tr)
6641		goto out_unlock;
6642
6643	tr->name = kstrdup(name, GFP_KERNEL);
6644	if (!tr->name)
6645		goto out_free_tr;
6646
6647	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6648		goto out_free_tr;
6649
6650	tr->trace_flags = global_trace.trace_flags;
6651
6652	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6653
6654	raw_spin_lock_init(&tr->start_lock);
6655
6656	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6657
6658	tr->current_trace = &nop_trace;
6659
6660	INIT_LIST_HEAD(&tr->systems);
6661	INIT_LIST_HEAD(&tr->events);
6662
6663	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6664		goto out_free_tr;
6665
6666	tr->dir = tracefs_create_dir(name, trace_instance_dir);
6667	if (!tr->dir)
6668		goto out_free_tr;
6669
6670	ret = event_trace_add_tracer(tr->dir, tr);
6671	if (ret) {
6672		tracefs_remove_recursive(tr->dir);
6673		goto out_free_tr;
6674	}
6675
6676	init_tracer_tracefs(tr, tr->dir);
6677	init_trace_flags_index(tr);
6678	__update_tracer_options(tr);
6679
6680	list_add(&tr->list, &ftrace_trace_arrays);
6681
6682	mutex_unlock(&trace_types_lock);
6683
6684	return 0;
6685
6686 out_free_tr:
6687	free_trace_buffers(tr);
6688	free_cpumask_var(tr->tracing_cpumask);
6689	kfree(tr->name);
6690	kfree(tr);
6691
6692 out_unlock:
6693	mutex_unlock(&trace_types_lock);
6694
6695	return ret;
6696
6697}
6698
6699static int instance_rmdir(const char *name)
6700{
6701	struct trace_array *tr;
6702	int found = 0;
6703	int ret;
6704	int i;
6705
6706	mutex_lock(&trace_types_lock);
6707
6708	ret = -ENODEV;
6709	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6710		if (tr->name && strcmp(tr->name, name) == 0) {
6711			found = 1;
6712			break;
6713		}
6714	}
6715	if (!found)
6716		goto out_unlock;
6717
6718	ret = -EBUSY;
6719	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6720		goto out_unlock;
6721
6722	list_del(&tr->list);
6723
6724	tracing_set_nop(tr);
6725	event_trace_del_tracer(tr);
6726	ftrace_destroy_function_files(tr);
6727	tracefs_remove_recursive(tr->dir);
6728	free_trace_buffers(tr);
6729
6730	for (i = 0; i < tr->nr_topts; i++) {
6731		kfree(tr->topts[i].topts);
6732	}
6733	kfree(tr->topts);
6734
6735	kfree(tr->name);
6736	kfree(tr);
6737
6738	ret = 0;
6739
6740 out_unlock:
6741	mutex_unlock(&trace_types_lock);
6742
6743	return ret;
6744}
6745
6746static __init void create_trace_instances(struct dentry *d_tracer)
6747{
6748	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
6749							 instance_mkdir,
6750							 instance_rmdir);
6751	if (WARN_ON(!trace_instance_dir))
6752		return;
6753}
6754
6755static void
6756init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6757{
6758	int cpu;
6759
6760	trace_create_file("available_tracers", 0444, d_tracer,
6761			tr, &show_traces_fops);
6762
6763	trace_create_file("current_tracer", 0644, d_tracer,
6764			tr, &set_tracer_fops);
6765
6766	trace_create_file("tracing_cpumask", 0644, d_tracer,
6767			  tr, &tracing_cpumask_fops);
6768
6769	trace_create_file("trace_options", 0644, d_tracer,
6770			  tr, &tracing_iter_fops);
6771
6772	trace_create_file("trace", 0644, d_tracer,
6773			  tr, &tracing_fops);
6774
6775	trace_create_file("trace_pipe", 0444, d_tracer,
6776			  tr, &tracing_pipe_fops);
6777
6778	trace_create_file("buffer_size_kb", 0644, d_tracer,
6779			  tr, &tracing_entries_fops);
6780
6781	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6782			  tr, &tracing_total_entries_fops);
6783
6784	trace_create_file("free_buffer", 0200, d_tracer,
6785			  tr, &tracing_free_buffer_fops);
6786
6787	trace_create_file("trace_marker", 0220, d_tracer,
6788			  tr, &tracing_mark_fops);
6789
6790	trace_create_file("trace_clock", 0644, d_tracer, tr,
6791			  &trace_clock_fops);
6792
6793	trace_create_file("tracing_on", 0644, d_tracer,
6794			  tr, &rb_simple_fops);
6795
6796	create_trace_options_dir(tr);
6797
6798#ifdef CONFIG_TRACER_MAX_TRACE
6799	trace_create_file("tracing_max_latency", 0644, d_tracer,
6800			&tr->max_latency, &tracing_max_lat_fops);
6801#endif
6802
6803	if (ftrace_create_function_files(tr, d_tracer))
6804		WARN(1, "Could not allocate function filter files");
6805
6806#ifdef CONFIG_TRACER_SNAPSHOT
6807	trace_create_file("snapshot", 0644, d_tracer,
6808			  tr, &snapshot_fops);
6809#endif
6810
6811	for_each_tracing_cpu(cpu)
6812		tracing_init_tracefs_percpu(tr, cpu);
6813
6814}
6815
6816static struct vfsmount *trace_automount(void *ingore)
6817{
6818	struct vfsmount *mnt;
6819	struct file_system_type *type;
6820
6821	/*
6822	 * To maintain backward compatibility for tools that mount
6823	 * debugfs to get to the tracing facility, tracefs is automatically
6824	 * mounted to the debugfs/tracing directory.
6825	 */
6826	type = get_fs_type("tracefs");
6827	if (!type)
6828		return NULL;
6829	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
6830	put_filesystem(type);
6831	if (IS_ERR(mnt))
6832		return NULL;
6833	mntget(mnt);
6834
6835	return mnt;
6836}
6837
6838/**
6839 * tracing_init_dentry - initialize top level trace array
6840 *
6841 * This is called when creating files or directories in the tracing
6842 * directory. It is called via fs_initcall() by any of the boot up code
6843 * and expects to return the dentry of the top level tracing directory.
6844 */
6845struct dentry *tracing_init_dentry(void)
6846{
6847	struct trace_array *tr = &global_trace;
6848
6849	/* The top level trace array uses  NULL as parent */
6850	if (tr->dir)
6851		return NULL;
6852
6853	if (WARN_ON(!tracefs_initialized()) ||
6854		(IS_ENABLED(CONFIG_DEBUG_FS) &&
6855		 WARN_ON(!debugfs_initialized())))
6856		return ERR_PTR(-ENODEV);
6857
6858	/*
6859	 * As there may still be users that expect the tracing
6860	 * files to exist in debugfs/tracing, we must automount
6861	 * the tracefs file system there, so older tools still
6862	 * work with the newer kerenl.
6863	 */
6864	tr->dir = debugfs_create_automount("tracing", NULL,
6865					   trace_automount, NULL);
6866	if (!tr->dir) {
6867		pr_warn_once("Could not create debugfs directory 'tracing'\n");
6868		return ERR_PTR(-ENOMEM);
6869	}
6870
6871	return NULL;
6872}
6873
6874extern struct trace_enum_map *__start_ftrace_enum_maps[];
6875extern struct trace_enum_map *__stop_ftrace_enum_maps[];
6876
6877static void __init trace_enum_init(void)
6878{
6879	int len;
6880
6881	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
6882	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
6883}
6884
6885#ifdef CONFIG_MODULES
6886static void trace_module_add_enums(struct module *mod)
6887{
6888	if (!mod->num_trace_enums)
6889		return;
6890
6891	/*
6892	 * Modules with bad taint do not have events created, do
6893	 * not bother with enums either.
6894	 */
6895	if (trace_module_has_bad_taint(mod))
6896		return;
6897
6898	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
6899}
6900
6901#ifdef CONFIG_TRACE_ENUM_MAP_FILE
6902static void trace_module_remove_enums(struct module *mod)
6903{
6904	union trace_enum_map_item *map;
6905	union trace_enum_map_item **last = &trace_enum_maps;
6906
6907	if (!mod->num_trace_enums)
6908		return;
6909
6910	mutex_lock(&trace_enum_mutex);
6911
6912	map = trace_enum_maps;
6913
6914	while (map) {
6915		if (map->head.mod == mod)
6916			break;
6917		map = trace_enum_jmp_to_tail(map);
6918		last = &map->tail.next;
6919		map = map->tail.next;
6920	}
6921	if (!map)
6922		goto out;
6923
6924	*last = trace_enum_jmp_to_tail(map)->tail.next;
6925	kfree(map);
6926 out:
6927	mutex_unlock(&trace_enum_mutex);
6928}
6929#else
6930static inline void trace_module_remove_enums(struct module *mod) { }
6931#endif /* CONFIG_TRACE_ENUM_MAP_FILE */
6932
6933static int trace_module_notify(struct notifier_block *self,
6934			       unsigned long val, void *data)
6935{
6936	struct module *mod = data;
6937
6938	switch (val) {
6939	case MODULE_STATE_COMING:
6940		trace_module_add_enums(mod);
6941		break;
6942	case MODULE_STATE_GOING:
6943		trace_module_remove_enums(mod);
6944		break;
6945	}
6946
6947	return 0;
6948}
6949
6950static struct notifier_block trace_module_nb = {
6951	.notifier_call = trace_module_notify,
6952	.priority = 0,
6953};
6954#endif /* CONFIG_MODULES */
6955
6956static __init int tracer_init_tracefs(void)
6957{
6958	struct dentry *d_tracer;
6959
6960	trace_access_lock_init();
6961
6962	d_tracer = tracing_init_dentry();
6963	if (IS_ERR(d_tracer))
6964		return 0;
6965
6966	init_tracer_tracefs(&global_trace, d_tracer);
6967
6968	trace_create_file("tracing_thresh", 0644, d_tracer,
6969			&global_trace, &tracing_thresh_fops);
6970
6971	trace_create_file("README", 0444, d_tracer,
6972			NULL, &tracing_readme_fops);
6973
6974	trace_create_file("saved_cmdlines", 0444, d_tracer,
6975			NULL, &tracing_saved_cmdlines_fops);
6976
6977	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6978			  NULL, &tracing_saved_cmdlines_size_fops);
6979
6980	trace_enum_init();
6981
6982	trace_create_enum_file(d_tracer);
6983
6984#ifdef CONFIG_MODULES
6985	register_module_notifier(&trace_module_nb);
6986#endif
6987
6988#ifdef CONFIG_DYNAMIC_FTRACE
6989	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6990			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6991#endif
6992
6993	create_trace_instances(d_tracer);
6994
6995	update_tracer_options(&global_trace);
6996
6997	return 0;
6998}
6999
7000static int trace_panic_handler(struct notifier_block *this,
7001			       unsigned long event, void *unused)
7002{
7003	if (ftrace_dump_on_oops)
7004		ftrace_dump(ftrace_dump_on_oops);
7005	return NOTIFY_OK;
7006}
7007
7008static struct notifier_block trace_panic_notifier = {
7009	.notifier_call  = trace_panic_handler,
7010	.next           = NULL,
7011	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
7012};
7013
7014static int trace_die_handler(struct notifier_block *self,
7015			     unsigned long val,
7016			     void *data)
7017{
7018	switch (val) {
7019	case DIE_OOPS:
7020		if (ftrace_dump_on_oops)
7021			ftrace_dump(ftrace_dump_on_oops);
7022		break;
7023	default:
7024		break;
7025	}
7026	return NOTIFY_OK;
7027}
7028
7029static struct notifier_block trace_die_notifier = {
7030	.notifier_call = trace_die_handler,
7031	.priority = 200
7032};
7033
7034/*
7035 * printk is set to max of 1024, we really don't need it that big.
7036 * Nothing should be printing 1000 characters anyway.
7037 */
7038#define TRACE_MAX_PRINT		1000
7039
7040/*
7041 * Define here KERN_TRACE so that we have one place to modify
7042 * it if we decide to change what log level the ftrace dump
7043 * should be at.
7044 */
7045#define KERN_TRACE		KERN_EMERG
7046
7047void
7048trace_printk_seq(struct trace_seq *s)
7049{
7050	/* Probably should print a warning here. */
7051	if (s->seq.len >= TRACE_MAX_PRINT)
7052		s->seq.len = TRACE_MAX_PRINT;
7053
7054	/*
7055	 * More paranoid code. Although the buffer size is set to
7056	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7057	 * an extra layer of protection.
7058	 */
7059	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7060		s->seq.len = s->seq.size - 1;
7061
7062	/* should be zero ended, but we are paranoid. */
7063	s->buffer[s->seq.len] = 0;
7064
7065	printk(KERN_TRACE "%s", s->buffer);
7066
7067	trace_seq_init(s);
7068}
7069
7070void trace_init_global_iter(struct trace_iterator *iter)
7071{
7072	iter->tr = &global_trace;
7073	iter->trace = iter->tr->current_trace;
7074	iter->cpu_file = RING_BUFFER_ALL_CPUS;
7075	iter->trace_buffer = &global_trace.trace_buffer;
7076
7077	if (iter->trace && iter->trace->open)
7078		iter->trace->open(iter);
7079
7080	/* Annotate start of buffers if we had overruns */
7081	if (ring_buffer_overruns(iter->trace_buffer->buffer))
7082		iter->iter_flags |= TRACE_FILE_ANNOTATE;
7083
7084	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
7085	if (trace_clocks[iter->tr->clock_id].in_ns)
7086		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7087}
7088
7089void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7090{
7091	/* use static because iter can be a bit big for the stack */
7092	static struct trace_iterator iter;
7093	static atomic_t dump_running;
7094	struct trace_array *tr = &global_trace;
7095	unsigned int old_userobj;
7096	unsigned long flags;
7097	int cnt = 0, cpu;
7098
7099	/* Only allow one dump user at a time. */
7100	if (atomic_inc_return(&dump_running) != 1) {
7101		atomic_dec(&dump_running);
7102		return;
7103	}
7104
7105	/*
7106	 * Always turn off tracing when we dump.
7107	 * We don't need to show trace output of what happens
7108	 * between multiple crashes.
7109	 *
7110	 * If the user does a sysrq-z, then they can re-enable
7111	 * tracing with echo 1 > tracing_on.
7112	 */
7113	tracing_off();
7114
7115	local_irq_save(flags);
7116
7117	/* Simulate the iterator */
7118	trace_init_global_iter(&iter);
7119
7120	for_each_tracing_cpu(cpu) {
7121		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7122	}
7123
7124	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7125
7126	/* don't look at user memory in panic mode */
7127	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7128
7129	switch (oops_dump_mode) {
7130	case DUMP_ALL:
7131		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7132		break;
7133	case DUMP_ORIG:
7134		iter.cpu_file = raw_smp_processor_id();
7135		break;
7136	case DUMP_NONE:
7137		goto out_enable;
7138	default:
7139		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7140		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7141	}
7142
7143	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7144
7145	/* Did function tracer already get disabled? */
7146	if (ftrace_is_dead()) {
7147		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7148		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7149	}
7150
7151	/*
7152	 * We need to stop all tracing on all CPUS to read the
7153	 * the next buffer. This is a bit expensive, but is
7154	 * not done often. We fill all what we can read,
7155	 * and then release the locks again.
7156	 */
7157
7158	while (!trace_empty(&iter)) {
7159
7160		if (!cnt)
7161			printk(KERN_TRACE "---------------------------------\n");
7162
7163		cnt++;
7164
7165		/* reset all but tr, trace, and overruns */
7166		memset(&iter.seq, 0,
7167		       sizeof(struct trace_iterator) -
7168		       offsetof(struct trace_iterator, seq));
7169		iter.iter_flags |= TRACE_FILE_LAT_FMT;
7170		iter.pos = -1;
7171
7172		if (trace_find_next_entry_inc(&iter) != NULL) {
7173			int ret;
7174
7175			ret = print_trace_line(&iter);
7176			if (ret != TRACE_TYPE_NO_CONSUME)
7177				trace_consume(&iter);
7178		}
7179		touch_nmi_watchdog();
7180
7181		trace_printk_seq(&iter.seq);
7182	}
7183
7184	if (!cnt)
7185		printk(KERN_TRACE "   (ftrace buffer empty)\n");
7186	else
7187		printk(KERN_TRACE "---------------------------------\n");
7188
7189 out_enable:
7190	tr->trace_flags |= old_userobj;
7191
7192	for_each_tracing_cpu(cpu) {
7193		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7194	}
7195 	atomic_dec(&dump_running);
7196	local_irq_restore(flags);
7197}
7198EXPORT_SYMBOL_GPL(ftrace_dump);
7199
7200__init static int tracer_alloc_buffers(void)
7201{
7202	int ring_buf_size;
7203	int ret = -ENOMEM;
7204
7205	/*
7206	 * Make sure we don't accidently add more trace options
7207	 * than we have bits for.
7208	 */
7209	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7210
7211	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7212		goto out;
7213
7214	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7215		goto out_free_buffer_mask;
7216
7217	/* Only allocate trace_printk buffers if a trace_printk exists */
7218	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7219		/* Must be called before global_trace.buffer is allocated */
7220		trace_printk_init_buffers();
7221
7222	/* To save memory, keep the ring buffer size to its minimum */
7223	if (ring_buffer_expanded)
7224		ring_buf_size = trace_buf_size;
7225	else
7226		ring_buf_size = 1;
7227
7228	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7229	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7230
7231	raw_spin_lock_init(&global_trace.start_lock);
7232
7233	/* Used for event triggers */
7234	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7235	if (!temp_buffer)
7236		goto out_free_cpumask;
7237
7238	if (trace_create_savedcmd() < 0)
7239		goto out_free_temp_buffer;
7240
7241	/* TODO: make the number of buffers hot pluggable with CPUS */
7242	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7243		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7244		WARN_ON(1);
7245		goto out_free_savedcmd;
7246	}
7247
7248	if (global_trace.buffer_disabled)
7249		tracing_off();
7250
7251	if (trace_boot_clock) {
7252		ret = tracing_set_clock(&global_trace, trace_boot_clock);
7253		if (ret < 0)
7254			pr_warning("Trace clock %s not defined, going back to default\n",
7255				   trace_boot_clock);
7256	}
7257
7258	/*
7259	 * register_tracer() might reference current_trace, so it
7260	 * needs to be set before we register anything. This is
7261	 * just a bootstrap of current_trace anyway.
7262	 */
7263	global_trace.current_trace = &nop_trace;
7264
7265	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7266
7267	ftrace_init_global_array_ops(&global_trace);
7268
7269	init_trace_flags_index(&global_trace);
7270
7271	register_tracer(&nop_trace);
7272
7273	/* All seems OK, enable tracing */
7274	tracing_disabled = 0;
7275
7276	atomic_notifier_chain_register(&panic_notifier_list,
7277				       &trace_panic_notifier);
7278
7279	register_die_notifier(&trace_die_notifier);
7280
7281	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7282
7283	INIT_LIST_HEAD(&global_trace.systems);
7284	INIT_LIST_HEAD(&global_trace.events);
7285	list_add(&global_trace.list, &ftrace_trace_arrays);
7286
7287	apply_trace_boot_options();
7288
7289	register_snapshot_cmd();
7290
7291	return 0;
7292
7293out_free_savedcmd:
7294	free_saved_cmdlines_buffer(savedcmd);
7295out_free_temp_buffer:
7296	ring_buffer_free(temp_buffer);
7297out_free_cpumask:
7298	free_cpumask_var(global_trace.tracing_cpumask);
7299out_free_buffer_mask:
7300	free_cpumask_var(tracing_buffer_mask);
7301out:
7302	return ret;
7303}
7304
7305void __init trace_init(void)
7306{
7307	if (tracepoint_printk) {
7308		tracepoint_print_iter =
7309			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
7310		if (WARN_ON(!tracepoint_print_iter))
7311			tracepoint_printk = 0;
7312	}
7313	tracer_alloc_buffers();
7314	trace_event_init();
7315}
7316
7317__init static int clear_boot_tracer(void)
7318{
7319	/*
7320	 * The default tracer at boot buffer is an init section.
7321	 * This function is called in lateinit. If we did not
7322	 * find the boot tracer, then clear it out, to prevent
7323	 * later registration from accessing the buffer that is
7324	 * about to be freed.
7325	 */
7326	if (!default_bootup_tracer)
7327		return 0;
7328
7329	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
7330	       default_bootup_tracer);
7331	default_bootup_tracer = NULL;
7332
7333	return 0;
7334}
7335
7336fs_initcall(tracer_init_tracefs);
7337late_initcall(clear_boot_tracer);
7338