blob: 42daa73fc43310c086e562d3139b9bc01fc9b693 [file] [log] [blame]
b.liue9582032025-04-17 19:18:16 +08001// SPDX-License-Identifier: GPL-2.0
2/*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15#include <linux/ring_buffer.h>
16#include <generated/utsrelease.h>
17#include <linux/stacktrace.h>
18#include <linux/writeback.h>
19#include <linux/kallsyms.h>
20#include <linux/security.h>
21#include <linux/seq_file.h>
22#include <linux/notifier.h>
23#include <linux/irqflags.h>
24#include <linux/debugfs.h>
25#include <linux/tracefs.h>
26#include <linux/pagemap.h>
27#include <linux/hardirq.h>
28#include <linux/linkage.h>
29#include <linux/uaccess.h>
30#include <linux/vmalloc.h>
31#include <linux/ftrace.h>
32#include <linux/module.h>
33#include <linux/percpu.h>
34#include <linux/splice.h>
35#include <linux/kdebug.h>
36#include <linux/string.h>
37#include <linux/mount.h>
38#include <linux/rwsem.h>
39#include <linux/slab.h>
40#include <linux/ctype.h>
41#include <linux/init.h>
42#include <linux/kmemleak.h>
43#include <linux/poll.h>
44#include <linux/nmi.h>
45#include <linux/fs.h>
46#include <linux/trace.h>
47#include <linux/sched/clock.h>
48#include <linux/sched/rt.h>
49
50#include "trace.h"
51#include "trace_output.h"
52
53/*
54 * On boot up, the ring buffer is set to the minimum size, so that
55 * we do not waste memory on systems that are not using tracing.
56 */
57bool ring_buffer_expanded;
58
59/*
60 * We need to change this state when a selftest is running.
61 * A selftest will lurk into the ring-buffer to count the
62 * entries inserted during the selftest although some concurrent
63 * insertions into the ring-buffer such as trace_printk could occurred
64 * at the same time, giving false positive or negative results.
65 */
66static bool __read_mostly tracing_selftest_running;
67
68/*
69 * If a tracer is running, we do not want to run SELFTEST.
70 */
71bool __read_mostly tracing_selftest_disabled;
72
73/* Pipe tracepoints to printk */
74struct trace_iterator *tracepoint_print_iter;
75int tracepoint_printk;
76static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
77
78/* For tracers that don't implement custom flags */
79static struct tracer_opt dummy_tracer_opt[] = {
80 { }
81};
82
83static int
84dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
85{
86 return 0;
87}
88
89/*
90 * To prevent the comm cache from being overwritten when no
91 * tracing is active, only save the comm when a trace event
92 * occurred.
93 */
94static DEFINE_PER_CPU(bool, trace_taskinfo_save);
95
96/*
97 * Kill all tracing for good (never come back).
98 * It is initialized to 1 but will turn to zero if the initialization
99 * of the tracer is successful. But that is the only place that sets
100 * this back to zero.
101 */
102static int tracing_disabled = 1;
103
104cpumask_var_t __read_mostly tracing_buffer_mask;
105
106/*
107 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
108 *
109 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
110 * is set, then ftrace_dump is called. This will output the contents
111 * of the ftrace buffers to the console. This is very useful for
112 * capturing traces that lead to crashes and outputing it to a
113 * serial console.
114 *
115 * It is default off, but you can enable it with either specifying
116 * "ftrace_dump_on_oops" in the kernel command line, or setting
117 * /proc/sys/kernel/ftrace_dump_on_oops
118 * Set 1 if you want to dump buffers of all CPUs
119 * Set 2 if you want to dump the buffer of the CPU that triggered oops
120 */
121
122enum ftrace_dump_mode ftrace_dump_on_oops;
123
124/* When set, tracing will stop when a WARN*() is hit */
125int __disable_trace_on_warning;
126
127#ifdef CONFIG_TRACE_EVAL_MAP_FILE
128/* Map of enums to their values, for "eval_map" file */
129struct trace_eval_map_head {
130 struct module *mod;
131 unsigned long length;
132};
133
134union trace_eval_map_item;
135
136struct trace_eval_map_tail {
137 /*
138 * "end" is first and points to NULL as it must be different
139 * than "mod" or "eval_string"
140 */
141 union trace_eval_map_item *next;
142 const char *end; /* points to NULL */
143};
144
145static DEFINE_MUTEX(trace_eval_mutex);
146
147/*
148 * The trace_eval_maps are saved in an array with two extra elements,
149 * one at the beginning, and one at the end. The beginning item contains
150 * the count of the saved maps (head.length), and the module they
151 * belong to if not built in (head.mod). The ending item contains a
152 * pointer to the next array of saved eval_map items.
153 */
154union trace_eval_map_item {
155 struct trace_eval_map map;
156 struct trace_eval_map_head head;
157 struct trace_eval_map_tail tail;
158};
159
160static union trace_eval_map_item *trace_eval_maps;
161#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
162
163static int tracing_set_tracer(struct trace_array *tr, const char *buf);
164static void ftrace_trace_userstack(struct trace_array *tr,
165 struct ring_buffer *buffer,
166 unsigned long flags, int pc);
167
168#define MAX_TRACER_SIZE 100
169static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
170static char *default_bootup_tracer;
171
172static bool allocate_snapshot;
173
174static int __init set_cmdline_ftrace(char *str)
175{
176 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
177 default_bootup_tracer = bootup_tracer_buf;
178 /* We are using ftrace early, expand it */
179 ring_buffer_expanded = true;
180 return 1;
181}
182__setup("ftrace=", set_cmdline_ftrace);
183
184static int __init set_ftrace_dump_on_oops(char *str)
185{
186 if (*str++ != '=' || !*str) {
187 ftrace_dump_on_oops = DUMP_ALL;
188 return 1;
189 }
190
191 if (!strcmp("orig_cpu", str)) {
192 ftrace_dump_on_oops = DUMP_ORIG;
193 return 1;
194 }
195
196 return 0;
197}
198__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
199
200static int __init stop_trace_on_warning(char *str)
201{
202 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
203 __disable_trace_on_warning = 1;
204 return 1;
205}
206__setup("traceoff_on_warning", stop_trace_on_warning);
207
208static int __init boot_alloc_snapshot(char *str)
209{
210 allocate_snapshot = true;
211 /* We also need the main ring buffer expanded */
212 ring_buffer_expanded = true;
213 return 1;
214}
215__setup("alloc_snapshot", boot_alloc_snapshot);
216
217
218static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
219
220static int __init set_trace_boot_options(char *str)
221{
222 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
223 return 1;
224}
225__setup("trace_options=", set_trace_boot_options);
226
227static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
228static char *trace_boot_clock __initdata;
229
230static int __init set_trace_boot_clock(char *str)
231{
232 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
233 trace_boot_clock = trace_boot_clock_buf;
234 return 1;
235}
236__setup("trace_clock=", set_trace_boot_clock);
237
238static int __init set_tracepoint_printk(char *str)
239{
240 /* Ignore the "tp_printk_stop_on_boot" param */
241 if (*str == '_')
242 return 0;
243
244 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
245 tracepoint_printk = 1;
246 return 1;
247}
248__setup("tp_printk", set_tracepoint_printk);
249
250unsigned long long ns2usecs(u64 nsec)
251{
252 nsec += 500;
253 do_div(nsec, 1000);
254 return nsec;
255}
256
257/* trace_flags holds trace_options default values */
258#define TRACE_DEFAULT_FLAGS \
259 (FUNCTION_DEFAULT_FLAGS | \
260 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
261 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
262 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
263 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
264
265/* trace_options that are only supported by global_trace */
266#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
267 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
268
269/* trace_flags that are default zero for instances */
270#define ZEROED_TRACE_FLAGS \
271 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
272
273/*
274 * The global_trace is the descriptor that holds the top-level tracing
275 * buffers for the live tracing.
276 */
277static struct trace_array global_trace = {
278 .trace_flags = TRACE_DEFAULT_FLAGS,
279};
280
281LIST_HEAD(ftrace_trace_arrays);
282
283int trace_array_get(struct trace_array *this_tr)
284{
285 struct trace_array *tr;
286 int ret = -ENODEV;
287
288 mutex_lock(&trace_types_lock);
289 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
290 if (tr == this_tr) {
291 tr->ref++;
292 ret = 0;
293 break;
294 }
295 }
296 mutex_unlock(&trace_types_lock);
297
298 return ret;
299}
300
301static void __trace_array_put(struct trace_array *this_tr)
302{
303 WARN_ON(!this_tr->ref);
304 this_tr->ref--;
305}
306
307void trace_array_put(struct trace_array *this_tr)
308{
309 mutex_lock(&trace_types_lock);
310 __trace_array_put(this_tr);
311 mutex_unlock(&trace_types_lock);
312}
313
314int tracing_check_open_get_tr(struct trace_array *tr)
315{
316 int ret;
317
318 ret = security_locked_down(LOCKDOWN_TRACEFS);
319 if (ret)
320 return ret;
321
322 if (tracing_disabled)
323 return -ENODEV;
324
325 if (tr && trace_array_get(tr) < 0)
326 return -ENODEV;
327
328 return 0;
329}
330
331int call_filter_check_discard(struct trace_event_call *call, void *rec,
332 struct ring_buffer *buffer,
333 struct ring_buffer_event *event)
334{
335 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
336 !filter_match_preds(call->filter, rec)) {
337 __trace_event_discard_commit(buffer, event);
338 return 1;
339 }
340
341 return 0;
342}
343
344void trace_free_pid_list(struct trace_pid_list *pid_list)
345{
346 vfree(pid_list->pids);
347 kfree(pid_list);
348}
349
350/**
351 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
352 * @filtered_pids: The list of pids to check
353 * @search_pid: The PID to find in @filtered_pids
354 *
355 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
356 */
357bool
358trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
359{
360 /*
361 * If pid_max changed after filtered_pids was created, we
362 * by default ignore all pids greater than the previous pid_max.
363 */
364 if (search_pid >= filtered_pids->pid_max)
365 return false;
366
367 return test_bit(search_pid, filtered_pids->pids);
368}
369
370/**
371 * trace_ignore_this_task - should a task be ignored for tracing
372 * @filtered_pids: The list of pids to check
373 * @task: The task that should be ignored if not filtered
374 *
375 * Checks if @task should be traced or not from @filtered_pids.
376 * Returns true if @task should *NOT* be traced.
377 * Returns false if @task should be traced.
378 */
379bool
380trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
381{
382 /*
383 * Return false, because if filtered_pids does not exist,
384 * all pids are good to trace.
385 */
386 if (!filtered_pids)
387 return false;
388
389 return !trace_find_filtered_pid(filtered_pids, task->pid);
390}
391
392/**
393 * trace_filter_add_remove_task - Add or remove a task from a pid_list
394 * @pid_list: The list to modify
395 * @self: The current task for fork or NULL for exit
396 * @task: The task to add or remove
397 *
398 * If adding a task, if @self is defined, the task is only added if @self
399 * is also included in @pid_list. This happens on fork and tasks should
400 * only be added when the parent is listed. If @self is NULL, then the
401 * @task pid will be removed from the list, which would happen on exit
402 * of a task.
403 */
404void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
405 struct task_struct *self,
406 struct task_struct *task)
407{
408 if (!pid_list)
409 return;
410
411 /* For forks, we only add if the forking task is listed */
412 if (self) {
413 if (!trace_find_filtered_pid(pid_list, self->pid))
414 return;
415 }
416
417 /* Sorry, but we don't support pid_max changing after setting */
418 if (task->pid >= pid_list->pid_max)
419 return;
420
421 /* "self" is set for forks, and NULL for exits */
422 if (self)
423 set_bit(task->pid, pid_list->pids);
424 else
425 clear_bit(task->pid, pid_list->pids);
426}
427
428/**
429 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
430 * @pid_list: The pid list to show
431 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
432 * @pos: The position of the file
433 *
434 * This is used by the seq_file "next" operation to iterate the pids
435 * listed in a trace_pid_list structure.
436 *
437 * Returns the pid+1 as we want to display pid of zero, but NULL would
438 * stop the iteration.
439 */
440void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
441{
442 unsigned long pid = (unsigned long)v;
443
444 (*pos)++;
445
446 /* pid already is +1 of the actual prevous bit */
447 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
448
449 /* Return pid + 1 to allow zero to be represented */
450 if (pid < pid_list->pid_max)
451 return (void *)(pid + 1);
452
453 return NULL;
454}
455
456/**
457 * trace_pid_start - Used for seq_file to start reading pid lists
458 * @pid_list: The pid list to show
459 * @pos: The position of the file
460 *
461 * This is used by seq_file "start" operation to start the iteration
462 * of listing pids.
463 *
464 * Returns the pid+1 as we want to display pid of zero, but NULL would
465 * stop the iteration.
466 */
467void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
468{
469 unsigned long pid;
470 loff_t l = 0;
471
472 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
473 if (pid >= pid_list->pid_max)
474 return NULL;
475
476 /* Return pid + 1 so that zero can be the exit value */
477 for (pid++; pid && l < *pos;
478 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
479 ;
480 return (void *)pid;
481}
482
483/**
484 * trace_pid_show - show the current pid in seq_file processing
485 * @m: The seq_file structure to write into
486 * @v: A void pointer of the pid (+1) value to display
487 *
488 * Can be directly used by seq_file operations to display the current
489 * pid value.
490 */
491int trace_pid_show(struct seq_file *m, void *v)
492{
493 unsigned long pid = (unsigned long)v - 1;
494
495 seq_printf(m, "%lu\n", pid);
496 return 0;
497}
498
499/* 128 should be much more than enough */
500#define PID_BUF_SIZE 127
501
502int trace_pid_write(struct trace_pid_list *filtered_pids,
503 struct trace_pid_list **new_pid_list,
504 const char __user *ubuf, size_t cnt)
505{
506 struct trace_pid_list *pid_list;
507 struct trace_parser parser;
508 unsigned long val;
509 int nr_pids = 0;
510 ssize_t read = 0;
511 ssize_t ret = 0;
512 loff_t pos;
513 pid_t pid;
514
515 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
516 return -ENOMEM;
517
518 /*
519 * Always recreate a new array. The write is an all or nothing
520 * operation. Always create a new array when adding new pids by
521 * the user. If the operation fails, then the current list is
522 * not modified.
523 */
524 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
525 if (!pid_list) {
526 trace_parser_put(&parser);
527 return -ENOMEM;
528 }
529
530 pid_list->pid_max = READ_ONCE(pid_max);
531
532 /* Only truncating will shrink pid_max */
533 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
534 pid_list->pid_max = filtered_pids->pid_max;
535
536 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
537 if (!pid_list->pids) {
538 trace_parser_put(&parser);
539 kfree(pid_list);
540 return -ENOMEM;
541 }
542
543 if (filtered_pids) {
544 /* copy the current bits to the new max */
545 for_each_set_bit(pid, filtered_pids->pids,
546 filtered_pids->pid_max) {
547 set_bit(pid, pid_list->pids);
548 nr_pids++;
549 }
550 }
551
552 while (cnt > 0) {
553
554 pos = 0;
555
556 ret = trace_get_user(&parser, ubuf, cnt, &pos);
557 if (ret < 0 || !trace_parser_loaded(&parser))
558 break;
559
560 read += ret;
561 ubuf += ret;
562 cnt -= ret;
563
564 ret = -EINVAL;
565 if (kstrtoul(parser.buffer, 0, &val))
566 break;
567 if (val >= pid_list->pid_max)
568 break;
569
570 pid = (pid_t)val;
571
572 set_bit(pid, pid_list->pids);
573 nr_pids++;
574
575 trace_parser_clear(&parser);
576 ret = 0;
577 }
578 trace_parser_put(&parser);
579
580 if (ret < 0) {
581 trace_free_pid_list(pid_list);
582 return ret;
583 }
584
585 if (!nr_pids) {
586 /* Cleared the list of pids */
587 trace_free_pid_list(pid_list);
588 read = ret;
589 pid_list = NULL;
590 }
591
592 *new_pid_list = pid_list;
593
594 return read;
595}
596
597static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
598{
599 u64 ts;
600
601 /* Early boot up does not have a buffer yet */
602 if (!buf->buffer)
603 return trace_clock_local();
604
605 ts = ring_buffer_time_stamp(buf->buffer, cpu);
606 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
607
608 return ts;
609}
610
611u64 ftrace_now(int cpu)
612{
613 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
614}
615
616/**
617 * tracing_is_enabled - Show if global_trace has been disabled
618 *
619 * Shows if the global trace has been enabled or not. It uses the
620 * mirror flag "buffer_disabled" to be used in fast paths such as for
621 * the irqsoff tracer. But it may be inaccurate due to races. If you
622 * need to know the accurate state, use tracing_is_on() which is a little
623 * slower, but accurate.
624 */
625int tracing_is_enabled(void)
626{
627 /*
628 * For quick access (irqsoff uses this in fast path), just
629 * return the mirror variable of the state of the ring buffer.
630 * It's a little racy, but we don't really care.
631 */
632 smp_rmb();
633 return !global_trace.buffer_disabled;
634}
635
636/*
637 * trace_buf_size is the size in bytes that is allocated
638 * for a buffer. Note, the number of bytes is always rounded
639 * to page size.
640 *
641 * This number is purposely set to a low number of 16384.
642 * If the dump on oops happens, it will be much appreciated
643 * to not have to wait for all that output. Anyway this can be
644 * boot time and run time configurable.
645 */
646#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
647
648static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
649
650/* trace_types holds a link list of available tracers. */
651static struct tracer *trace_types __read_mostly;
652
653/*
654 * trace_types_lock is used to protect the trace_types list.
655 */
656DEFINE_MUTEX(trace_types_lock);
657
658/*
659 * serialize the access of the ring buffer
660 *
661 * ring buffer serializes readers, but it is low level protection.
662 * The validity of the events (which returns by ring_buffer_peek() ..etc)
663 * are not protected by ring buffer.
664 *
665 * The content of events may become garbage if we allow other process consumes
666 * these events concurrently:
667 * A) the page of the consumed events may become a normal page
668 * (not reader page) in ring buffer, and this page will be rewrited
669 * by events producer.
670 * B) The page of the consumed events may become a page for splice_read,
671 * and this page will be returned to system.
672 *
673 * These primitives allow multi process access to different cpu ring buffer
674 * concurrently.
675 *
676 * These primitives don't distinguish read-only and read-consume access.
677 * Multi read-only access are also serialized.
678 */
679
680#ifdef CONFIG_SMP
681static DECLARE_RWSEM(all_cpu_access_lock);
682static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
683
684static inline void trace_access_lock(int cpu)
685{
686 if (cpu == RING_BUFFER_ALL_CPUS) {
687 /* gain it for accessing the whole ring buffer. */
688 down_write(&all_cpu_access_lock);
689 } else {
690 /* gain it for accessing a cpu ring buffer. */
691
692 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
693 down_read(&all_cpu_access_lock);
694
695 /* Secondly block other access to this @cpu ring buffer. */
696 mutex_lock(&per_cpu(cpu_access_lock, cpu));
697 }
698}
699
700static inline void trace_access_unlock(int cpu)
701{
702 if (cpu == RING_BUFFER_ALL_CPUS) {
703 up_write(&all_cpu_access_lock);
704 } else {
705 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
706 up_read(&all_cpu_access_lock);
707 }
708}
709
710static inline void trace_access_lock_init(void)
711{
712 int cpu;
713
714 for_each_possible_cpu(cpu)
715 mutex_init(&per_cpu(cpu_access_lock, cpu));
716}
717
718#else
719
720static DEFINE_MUTEX(access_lock);
721
722static inline void trace_access_lock(int cpu)
723{
724 (void)cpu;
725 mutex_lock(&access_lock);
726}
727
728static inline void trace_access_unlock(int cpu)
729{
730 (void)cpu;
731 mutex_unlock(&access_lock);
732}
733
734static inline void trace_access_lock_init(void)
735{
736}
737
738#endif
739
740#ifdef CONFIG_STACKTRACE
741static void __ftrace_trace_stack(struct ring_buffer *buffer,
742 unsigned long flags,
743 int skip, int pc, struct pt_regs *regs);
744static inline void ftrace_trace_stack(struct trace_array *tr,
745 struct ring_buffer *buffer,
746 unsigned long flags,
747 int skip, int pc, struct pt_regs *regs);
748
749#else
750static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
751 unsigned long flags,
752 int skip, int pc, struct pt_regs *regs)
753{
754}
755static inline void ftrace_trace_stack(struct trace_array *tr,
756 struct ring_buffer *buffer,
757 unsigned long flags,
758 int skip, int pc, struct pt_regs *regs)
759{
760}
761
762#endif
763
764static __always_inline void
765trace_event_setup(struct ring_buffer_event *event,
766 int type, unsigned long flags, int pc)
767{
768 struct trace_entry *ent = ring_buffer_event_data(event);
769
770 tracing_generic_entry_update(ent, type, flags, pc);
771}
772
773static __always_inline struct ring_buffer_event *
774__trace_buffer_lock_reserve(struct ring_buffer *buffer,
775 int type,
776 unsigned long len,
777 unsigned long flags, int pc)
778{
779 struct ring_buffer_event *event;
780
781 event = ring_buffer_lock_reserve(buffer, len);
782 if (event != NULL)
783 trace_event_setup(event, type, flags, pc);
784
785 return event;
786}
787
788void tracer_tracing_on(struct trace_array *tr)
789{
790 if (tr->trace_buffer.buffer)
791 ring_buffer_record_on(tr->trace_buffer.buffer);
792 /*
793 * This flag is looked at when buffers haven't been allocated
794 * yet, or by some tracers (like irqsoff), that just want to
795 * know if the ring buffer has been disabled, but it can handle
796 * races of where it gets disabled but we still do a record.
797 * As the check is in the fast path of the tracers, it is more
798 * important to be fast than accurate.
799 */
800 tr->buffer_disabled = 0;
801 /* Make the flag seen by readers */
802 smp_wmb();
803}
804
805/**
806 * tracing_on - enable tracing buffers
807 *
808 * This function enables tracing buffers that may have been
809 * disabled with tracing_off.
810 */
811void tracing_on(void)
812{
813 tracer_tracing_on(&global_trace);
814}
815EXPORT_SYMBOL_GPL(tracing_on);
816
817
818static __always_inline void
819__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
820{
821 __this_cpu_write(trace_taskinfo_save, true);
822
823 /* If this is the temp buffer, we need to commit fully */
824 if (this_cpu_read(trace_buffered_event) == event) {
825 /* Length is in event->array[0] */
826 ring_buffer_write(buffer, event->array[0], &event->array[1]);
827 /* Release the temp buffer */
828 this_cpu_dec(trace_buffered_event_cnt);
829 } else
830 ring_buffer_unlock_commit(buffer, event);
831}
832
833/**
834 * __trace_puts - write a constant string into the trace buffer.
835 * @ip: The address of the caller
836 * @str: The constant string to write
837 * @size: The size of the string.
838 */
839int __trace_puts(unsigned long ip, const char *str, int size)
840{
841 struct ring_buffer_event *event;
842 struct ring_buffer *buffer;
843 struct print_entry *entry;
844 unsigned long irq_flags;
845 int alloc;
846 int pc;
847
848 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
849 return 0;
850
851 pc = preempt_count();
852
853 if (unlikely(tracing_selftest_running || tracing_disabled))
854 return 0;
855
856 alloc = sizeof(*entry) + size + 2; /* possible \n added */
857
858 local_save_flags(irq_flags);
859 buffer = global_trace.trace_buffer.buffer;
860 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
861 irq_flags, pc);
862 if (!event)
863 return 0;
864
865 entry = ring_buffer_event_data(event);
866 entry->ip = ip;
867
868 memcpy(&entry->buf, str, size);
869
870 /* Add a newline if necessary */
871 if (entry->buf[size - 1] != '\n') {
872 entry->buf[size] = '\n';
873 entry->buf[size + 1] = '\0';
874 } else
875 entry->buf[size] = '\0';
876
877 __buffer_unlock_commit(buffer, event);
878 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
879
880 return size;
881}
882EXPORT_SYMBOL_GPL(__trace_puts);
883
884/**
885 * __trace_bputs - write the pointer to a constant string into trace buffer
886 * @ip: The address of the caller
887 * @str: The constant string to write to the buffer to
888 */
889int __trace_bputs(unsigned long ip, const char *str)
890{
891 struct ring_buffer_event *event;
892 struct ring_buffer *buffer;
893 struct bputs_entry *entry;
894 unsigned long irq_flags;
895 int size = sizeof(struct bputs_entry);
896 int pc;
897
898 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
899 return 0;
900
901 pc = preempt_count();
902
903 if (unlikely(tracing_selftest_running || tracing_disabled))
904 return 0;
905
906 local_save_flags(irq_flags);
907 buffer = global_trace.trace_buffer.buffer;
908 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
909 irq_flags, pc);
910 if (!event)
911 return 0;
912
913 entry = ring_buffer_event_data(event);
914 entry->ip = ip;
915 entry->str = str;
916
917 __buffer_unlock_commit(buffer, event);
918 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
919
920 return 1;
921}
922EXPORT_SYMBOL_GPL(__trace_bputs);
923
924#ifdef CONFIG_TRACER_SNAPSHOT
925void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
926{
927 struct tracer *tracer = tr->current_trace;
928 unsigned long flags;
929
930 if (in_nmi()) {
931 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
932 internal_trace_puts("*** snapshot is being ignored ***\n");
933 return;
934 }
935
936 if (!tr->allocated_snapshot) {
937 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
938 internal_trace_puts("*** stopping trace here! ***\n");
939 tracing_off();
940 return;
941 }
942
943 /* Note, snapshot can not be used when the tracer uses it */
944 if (tracer->use_max_tr) {
945 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
946 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
947 return;
948 }
949
950 local_irq_save(flags);
951 update_max_tr(tr, current, smp_processor_id(), cond_data);
952 local_irq_restore(flags);
953}
954
955void tracing_snapshot_instance(struct trace_array *tr)
956{
957 tracing_snapshot_instance_cond(tr, NULL);
958}
959
960/**
961 * tracing_snapshot - take a snapshot of the current buffer.
962 *
963 * This causes a swap between the snapshot buffer and the current live
964 * tracing buffer. You can use this to take snapshots of the live
965 * trace when some condition is triggered, but continue to trace.
966 *
967 * Note, make sure to allocate the snapshot with either
968 * a tracing_snapshot_alloc(), or by doing it manually
969 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
970 *
971 * If the snapshot buffer is not allocated, it will stop tracing.
972 * Basically making a permanent snapshot.
973 */
974void tracing_snapshot(void)
975{
976 struct trace_array *tr = &global_trace;
977
978 tracing_snapshot_instance(tr);
979}
980EXPORT_SYMBOL_GPL(tracing_snapshot);
981
982/**
983 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
984 * @tr: The tracing instance to snapshot
985 * @cond_data: The data to be tested conditionally, and possibly saved
986 *
987 * This is the same as tracing_snapshot() except that the snapshot is
988 * conditional - the snapshot will only happen if the
989 * cond_snapshot.update() implementation receiving the cond_data
990 * returns true, which means that the trace array's cond_snapshot
991 * update() operation used the cond_data to determine whether the
992 * snapshot should be taken, and if it was, presumably saved it along
993 * with the snapshot.
994 */
995void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
996{
997 tracing_snapshot_instance_cond(tr, cond_data);
998}
999EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1000
1001/**
1002 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1003 * @tr: The tracing instance
1004 *
1005 * When the user enables a conditional snapshot using
1006 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1007 * with the snapshot. This accessor is used to retrieve it.
1008 *
1009 * Should not be called from cond_snapshot.update(), since it takes
1010 * the tr->max_lock lock, which the code calling
1011 * cond_snapshot.update() has already done.
1012 *
1013 * Returns the cond_data associated with the trace array's snapshot.
1014 */
1015void *tracing_cond_snapshot_data(struct trace_array *tr)
1016{
1017 void *cond_data = NULL;
1018
1019 local_irq_disable();
1020 arch_spin_lock(&tr->max_lock);
1021
1022 if (tr->cond_snapshot)
1023 cond_data = tr->cond_snapshot->cond_data;
1024
1025 arch_spin_unlock(&tr->max_lock);
1026 local_irq_enable();
1027
1028 return cond_data;
1029}
1030EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1031
1032static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1033 struct trace_buffer *size_buf, int cpu_id);
1034static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1035
1036int tracing_alloc_snapshot_instance(struct trace_array *tr)
1037{
1038 int ret;
1039
1040 if (!tr->allocated_snapshot) {
1041
1042 /* allocate spare buffer */
1043 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1044 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1045 if (ret < 0)
1046 return ret;
1047
1048 tr->allocated_snapshot = true;
1049 }
1050
1051 return 0;
1052}
1053
1054static void free_snapshot(struct trace_array *tr)
1055{
1056 /*
1057 * We don't free the ring buffer. instead, resize it because
1058 * The max_tr ring buffer has some state (e.g. ring->clock) and
1059 * we want preserve it.
1060 */
1061 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1062 set_buffer_entries(&tr->max_buffer, 1);
1063 tracing_reset_online_cpus(&tr->max_buffer);
1064 tr->allocated_snapshot = false;
1065}
1066
1067/**
1068 * tracing_alloc_snapshot - allocate snapshot buffer.
1069 *
1070 * This only allocates the snapshot buffer if it isn't already
1071 * allocated - it doesn't also take a snapshot.
1072 *
1073 * This is meant to be used in cases where the snapshot buffer needs
1074 * to be set up for events that can't sleep but need to be able to
1075 * trigger a snapshot.
1076 */
1077int tracing_alloc_snapshot(void)
1078{
1079 struct trace_array *tr = &global_trace;
1080 int ret;
1081
1082 ret = tracing_alloc_snapshot_instance(tr);
1083 WARN_ON(ret < 0);
1084
1085 return ret;
1086}
1087EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1088
1089/**
1090 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1091 *
1092 * This is similar to tracing_snapshot(), but it will allocate the
1093 * snapshot buffer if it isn't already allocated. Use this only
1094 * where it is safe to sleep, as the allocation may sleep.
1095 *
1096 * This causes a swap between the snapshot buffer and the current live
1097 * tracing buffer. You can use this to take snapshots of the live
1098 * trace when some condition is triggered, but continue to trace.
1099 */
1100void tracing_snapshot_alloc(void)
1101{
1102 int ret;
1103
1104 ret = tracing_alloc_snapshot();
1105 if (ret < 0)
1106 return;
1107
1108 tracing_snapshot();
1109}
1110EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1111
1112/**
1113 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1114 * @tr: The tracing instance
1115 * @cond_data: User data to associate with the snapshot
1116 * @update: Implementation of the cond_snapshot update function
1117 *
1118 * Check whether the conditional snapshot for the given instance has
1119 * already been enabled, or if the current tracer is already using a
1120 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1121 * save the cond_data and update function inside.
1122 *
1123 * Returns 0 if successful, error otherwise.
1124 */
1125int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1126 cond_update_fn_t update)
1127{
1128 struct cond_snapshot *cond_snapshot;
1129 int ret = 0;
1130
1131 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1132 if (!cond_snapshot)
1133 return -ENOMEM;
1134
1135 cond_snapshot->cond_data = cond_data;
1136 cond_snapshot->update = update;
1137
1138 mutex_lock(&trace_types_lock);
1139
1140 ret = tracing_alloc_snapshot_instance(tr);
1141 if (ret)
1142 goto fail_unlock;
1143
1144 if (tr->current_trace->use_max_tr) {
1145 ret = -EBUSY;
1146 goto fail_unlock;
1147 }
1148
1149 /*
1150 * The cond_snapshot can only change to NULL without the
1151 * trace_types_lock. We don't care if we race with it going
1152 * to NULL, but we want to make sure that it's not set to
1153 * something other than NULL when we get here, which we can
1154 * do safely with only holding the trace_types_lock and not
1155 * having to take the max_lock.
1156 */
1157 if (tr->cond_snapshot) {
1158 ret = -EBUSY;
1159 goto fail_unlock;
1160 }
1161
1162 local_irq_disable();
1163 arch_spin_lock(&tr->max_lock);
1164 tr->cond_snapshot = cond_snapshot;
1165 arch_spin_unlock(&tr->max_lock);
1166 local_irq_enable();
1167
1168 mutex_unlock(&trace_types_lock);
1169
1170 return ret;
1171
1172 fail_unlock:
1173 mutex_unlock(&trace_types_lock);
1174 kfree(cond_snapshot);
1175 return ret;
1176}
1177EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1178
1179/**
1180 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1181 * @tr: The tracing instance
1182 *
1183 * Check whether the conditional snapshot for the given instance is
1184 * enabled; if so, free the cond_snapshot associated with it,
1185 * otherwise return -EINVAL.
1186 *
1187 * Returns 0 if successful, error otherwise.
1188 */
1189int tracing_snapshot_cond_disable(struct trace_array *tr)
1190{
1191 int ret = 0;
1192
1193 local_irq_disable();
1194 arch_spin_lock(&tr->max_lock);
1195
1196 if (!tr->cond_snapshot)
1197 ret = -EINVAL;
1198 else {
1199 kfree(tr->cond_snapshot);
1200 tr->cond_snapshot = NULL;
1201 }
1202
1203 arch_spin_unlock(&tr->max_lock);
1204 local_irq_enable();
1205
1206 return ret;
1207}
1208EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1209#else
1210void tracing_snapshot(void)
1211{
1212 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1213}
1214EXPORT_SYMBOL_GPL(tracing_snapshot);
1215void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1216{
1217 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1218}
1219EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1220int tracing_alloc_snapshot(void)
1221{
1222 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1223 return -ENODEV;
1224}
1225EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1226void tracing_snapshot_alloc(void)
1227{
1228 /* Give warning */
1229 tracing_snapshot();
1230}
1231EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1232void *tracing_cond_snapshot_data(struct trace_array *tr)
1233{
1234 return NULL;
1235}
1236EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1237int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1238{
1239 return -ENODEV;
1240}
1241EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1242int tracing_snapshot_cond_disable(struct trace_array *tr)
1243{
1244 return false;
1245}
1246EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1247#endif /* CONFIG_TRACER_SNAPSHOT */
1248
1249void tracer_tracing_off(struct trace_array *tr)
1250{
1251 if (tr->trace_buffer.buffer)
1252 ring_buffer_record_off(tr->trace_buffer.buffer);
1253 /*
1254 * This flag is looked at when buffers haven't been allocated
1255 * yet, or by some tracers (like irqsoff), that just want to
1256 * know if the ring buffer has been disabled, but it can handle
1257 * races of where it gets disabled but we still do a record.
1258 * As the check is in the fast path of the tracers, it is more
1259 * important to be fast than accurate.
1260 */
1261 tr->buffer_disabled = 1;
1262 /* Make the flag seen by readers */
1263 smp_wmb();
1264}
1265
1266/**
1267 * tracing_off - turn off tracing buffers
1268 *
1269 * This function stops the tracing buffers from recording data.
1270 * It does not disable any overhead the tracers themselves may
1271 * be causing. This function simply causes all recording to
1272 * the ring buffers to fail.
1273 */
1274void tracing_off(void)
1275{
1276 tracer_tracing_off(&global_trace);
1277}
1278EXPORT_SYMBOL_GPL(tracing_off);
1279
1280void disable_trace_on_warning(void)
1281{
1282 if (__disable_trace_on_warning)
1283 tracing_off();
1284}
1285
1286/**
1287 * tracer_tracing_is_on - show real state of ring buffer enabled
1288 * @tr : the trace array to know if ring buffer is enabled
1289 *
1290 * Shows real state of the ring buffer if it is enabled or not.
1291 */
1292bool tracer_tracing_is_on(struct trace_array *tr)
1293{
1294 if (tr->trace_buffer.buffer)
1295 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1296 return !tr->buffer_disabled;
1297}
1298
1299/**
1300 * tracing_is_on - show state of ring buffers enabled
1301 */
1302int tracing_is_on(void)
1303{
1304 return tracer_tracing_is_on(&global_trace);
1305}
1306EXPORT_SYMBOL_GPL(tracing_is_on);
1307
1308static int __init set_buf_size(char *str)
1309{
1310 unsigned long buf_size;
1311
1312 if (!str)
1313 return 0;
1314 buf_size = memparse(str, &str);
1315 /*
1316 * nr_entries can not be zero and the startup
1317 * tests require some buffer space. Therefore
1318 * ensure we have at least 4096 bytes of buffer.
1319 */
1320 trace_buf_size = max(4096UL, buf_size);
1321 return 1;
1322}
1323__setup("trace_buf_size=", set_buf_size);
1324
1325static int __init set_tracing_thresh(char *str)
1326{
1327 unsigned long threshold;
1328 int ret;
1329
1330 if (!str)
1331 return 0;
1332 ret = kstrtoul(str, 0, &threshold);
1333 if (ret < 0)
1334 return 0;
1335 tracing_thresh = threshold * 1000;
1336 return 1;
1337}
1338__setup("tracing_thresh=", set_tracing_thresh);
1339
1340unsigned long nsecs_to_usecs(unsigned long nsecs)
1341{
1342 return nsecs / 1000;
1343}
1344
1345/*
1346 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1347 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1348 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1349 * of strings in the order that the evals (enum) were defined.
1350 */
1351#undef C
1352#define C(a, b) b
1353
1354/* These must match the bit postions in trace_iterator_flags */
1355static const char *trace_options[] = {
1356 TRACE_FLAGS
1357 NULL
1358};
1359
1360static struct {
1361 u64 (*func)(void);
1362 const char *name;
1363 int in_ns; /* is this clock in nanoseconds? */
1364} trace_clocks[] = {
1365 { trace_clock_local, "local", 1 },
1366 { trace_clock_global, "global", 1 },
1367 { trace_clock_counter, "counter", 0 },
1368 { trace_clock_jiffies, "uptime", 0 },
1369 { trace_clock, "perf", 1 },
1370 { ktime_get_mono_fast_ns, "mono", 1 },
1371 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1372 { ktime_get_boot_fast_ns, "boot", 1 },
1373 ARCH_TRACE_CLOCKS
1374};
1375
1376bool trace_clock_in_ns(struct trace_array *tr)
1377{
1378 if (trace_clocks[tr->clock_id].in_ns)
1379 return true;
1380
1381 return false;
1382}
1383
1384/*
1385 * trace_parser_get_init - gets the buffer for trace parser
1386 */
1387int trace_parser_get_init(struct trace_parser *parser, int size)
1388{
1389 memset(parser, 0, sizeof(*parser));
1390
1391 parser->buffer = kmalloc(size, GFP_KERNEL);
1392 if (!parser->buffer)
1393 return 1;
1394
1395 parser->size = size;
1396 return 0;
1397}
1398
1399/*
1400 * trace_parser_put - frees the buffer for trace parser
1401 */
1402void trace_parser_put(struct trace_parser *parser)
1403{
1404 kfree(parser->buffer);
1405 parser->buffer = NULL;
1406}
1407
1408/*
1409 * trace_get_user - reads the user input string separated by space
1410 * (matched by isspace(ch))
1411 *
1412 * For each string found the 'struct trace_parser' is updated,
1413 * and the function returns.
1414 *
1415 * Returns number of bytes read.
1416 *
1417 * See kernel/trace/trace.h for 'struct trace_parser' details.
1418 */
1419int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1420 size_t cnt, loff_t *ppos)
1421{
1422 char ch;
1423 size_t read = 0;
1424 ssize_t ret;
1425
1426 if (!*ppos)
1427 trace_parser_clear(parser);
1428
1429 ret = get_user(ch, ubuf++);
1430 if (ret)
1431 goto out;
1432
1433 read++;
1434 cnt--;
1435
1436 /*
1437 * The parser is not finished with the last write,
1438 * continue reading the user input without skipping spaces.
1439 */
1440 if (!parser->cont) {
1441 /* skip white space */
1442 while (cnt && isspace(ch)) {
1443 ret = get_user(ch, ubuf++);
1444 if (ret)
1445 goto out;
1446 read++;
1447 cnt--;
1448 }
1449
1450 parser->idx = 0;
1451
1452 /* only spaces were written */
1453 if (isspace(ch) || !ch) {
1454 *ppos += read;
1455 ret = read;
1456 goto out;
1457 }
1458 }
1459
1460 /* read the non-space input */
1461 while (cnt && !isspace(ch) && ch) {
1462 if (parser->idx < parser->size - 1)
1463 parser->buffer[parser->idx++] = ch;
1464 else {
1465 ret = -EINVAL;
1466 goto out;
1467 }
1468 ret = get_user(ch, ubuf++);
1469 if (ret)
1470 goto out;
1471 read++;
1472 cnt--;
1473 }
1474
1475 /* We either got finished input or we have to wait for another call. */
1476 if (isspace(ch) || !ch) {
1477 parser->buffer[parser->idx] = 0;
1478 parser->cont = false;
1479 } else if (parser->idx < parser->size - 1) {
1480 parser->cont = true;
1481 parser->buffer[parser->idx++] = ch;
1482 /* Make sure the parsed string always terminates with '\0'. */
1483 parser->buffer[parser->idx] = 0;
1484 } else {
1485 ret = -EINVAL;
1486 goto out;
1487 }
1488
1489 *ppos += read;
1490 ret = read;
1491
1492out:
1493 return ret;
1494}
1495
1496/* TODO add a seq_buf_to_buffer() */
1497static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1498{
1499 int len;
1500
1501 if (trace_seq_used(s) <= s->seq.readpos)
1502 return -EBUSY;
1503
1504 len = trace_seq_used(s) - s->seq.readpos;
1505 if (cnt > len)
1506 cnt = len;
1507 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1508
1509 s->seq.readpos += cnt;
1510 return cnt;
1511}
1512
1513unsigned long __read_mostly tracing_thresh;
1514
1515#ifdef CONFIG_TRACER_MAX_TRACE
1516/*
1517 * Copy the new maximum trace into the separate maximum-trace
1518 * structure. (this way the maximum trace is permanently saved,
1519 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1520 */
1521static void
1522__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1523{
1524 struct trace_buffer *trace_buf = &tr->trace_buffer;
1525 struct trace_buffer *max_buf = &tr->max_buffer;
1526 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1527 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1528
1529 max_buf->cpu = cpu;
1530 max_buf->time_start = data->preempt_timestamp;
1531
1532 max_data->saved_latency = tr->max_latency;
1533 max_data->critical_start = data->critical_start;
1534 max_data->critical_end = data->critical_end;
1535
1536 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1537 max_data->pid = tsk->pid;
1538 /*
1539 * If tsk == current, then use current_uid(), as that does not use
1540 * RCU. The irq tracer can be called out of RCU scope.
1541 */
1542 if (tsk == current)
1543 max_data->uid = current_uid();
1544 else
1545 max_data->uid = task_uid(tsk);
1546
1547 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1548 max_data->policy = tsk->policy;
1549 max_data->rt_priority = tsk->rt_priority;
1550
1551 /* record this tasks comm */
1552 tracing_record_cmdline(tsk);
1553}
1554
1555/**
1556 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1557 * @tr: tracer
1558 * @tsk: the task with the latency
1559 * @cpu: The cpu that initiated the trace.
1560 * @cond_data: User data associated with a conditional snapshot
1561 *
1562 * Flip the buffers between the @tr and the max_tr and record information
1563 * about which task was the cause of this latency.
1564 */
1565void
1566update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1567 void *cond_data)
1568{
1569 if (tr->stop_count)
1570 return;
1571
1572 WARN_ON_ONCE(!irqs_disabled());
1573
1574 if (!tr->allocated_snapshot) {
1575 /* Only the nop tracer should hit this when disabling */
1576 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1577 return;
1578 }
1579
1580 arch_spin_lock(&tr->max_lock);
1581
1582 /* Inherit the recordable setting from trace_buffer */
1583 if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1584 ring_buffer_record_on(tr->max_buffer.buffer);
1585 else
1586 ring_buffer_record_off(tr->max_buffer.buffer);
1587
1588#ifdef CONFIG_TRACER_SNAPSHOT
1589 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1590 goto out_unlock;
1591#endif
1592 swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1593
1594 __update_max_tr(tr, tsk, cpu);
1595
1596 out_unlock:
1597 arch_spin_unlock(&tr->max_lock);
1598}
1599
1600/**
1601 * update_max_tr_single - only copy one trace over, and reset the rest
1602 * @tr: tracer
1603 * @tsk: task with the latency
1604 * @cpu: the cpu of the buffer to copy.
1605 *
1606 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1607 */
1608void
1609update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1610{
1611 int ret;
1612
1613 if (tr->stop_count)
1614 return;
1615
1616 WARN_ON_ONCE(!irqs_disabled());
1617 if (!tr->allocated_snapshot) {
1618 /* Only the nop tracer should hit this when disabling */
1619 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1620 return;
1621 }
1622
1623 arch_spin_lock(&tr->max_lock);
1624
1625 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1626
1627 if (ret == -EBUSY) {
1628 /*
1629 * We failed to swap the buffer due to a commit taking
1630 * place on this CPU. We fail to record, but we reset
1631 * the max trace buffer (no one writes directly to it)
1632 * and flag that it failed.
1633 */
1634 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1635 "Failed to swap buffers due to commit in progress\n");
1636 }
1637
1638 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1639
1640 __update_max_tr(tr, tsk, cpu);
1641 arch_spin_unlock(&tr->max_lock);
1642}
1643#endif /* CONFIG_TRACER_MAX_TRACE */
1644
1645static int wait_on_pipe(struct trace_iterator *iter, int full)
1646{
1647 /* Iterators are static, they should be filled or empty */
1648 if (trace_buffer_iter(iter, iter->cpu_file))
1649 return 0;
1650
1651 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1652 full);
1653}
1654
1655#ifdef CONFIG_FTRACE_STARTUP_TEST
1656static bool selftests_can_run;
1657
1658struct trace_selftests {
1659 struct list_head list;
1660 struct tracer *type;
1661};
1662
1663static LIST_HEAD(postponed_selftests);
1664
1665static int save_selftest(struct tracer *type)
1666{
1667 struct trace_selftests *selftest;
1668
1669 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1670 if (!selftest)
1671 return -ENOMEM;
1672
1673 selftest->type = type;
1674 list_add(&selftest->list, &postponed_selftests);
1675 return 0;
1676}
1677
1678static int run_tracer_selftest(struct tracer *type)
1679{
1680 struct trace_array *tr = &global_trace;
1681 struct tracer *saved_tracer = tr->current_trace;
1682 int ret;
1683
1684 if (!type->selftest || tracing_selftest_disabled)
1685 return 0;
1686
1687 /*
1688 * If a tracer registers early in boot up (before scheduling is
1689 * initialized and such), then do not run its selftests yet.
1690 * Instead, run it a little later in the boot process.
1691 */
1692 if (!selftests_can_run)
1693 return save_selftest(type);
1694
1695 /*
1696 * Run a selftest on this tracer.
1697 * Here we reset the trace buffer, and set the current
1698 * tracer to be this tracer. The tracer can then run some
1699 * internal tracing to verify that everything is in order.
1700 * If we fail, we do not register this tracer.
1701 */
1702 tracing_reset_online_cpus(&tr->trace_buffer);
1703
1704 tr->current_trace = type;
1705
1706#ifdef CONFIG_TRACER_MAX_TRACE
1707 if (type->use_max_tr) {
1708 /* If we expanded the buffers, make sure the max is expanded too */
1709 if (ring_buffer_expanded)
1710 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1711 RING_BUFFER_ALL_CPUS);
1712 tr->allocated_snapshot = true;
1713 }
1714#endif
1715
1716 /* the test is responsible for initializing and enabling */
1717 pr_info("Testing tracer %s: ", type->name);
1718 ret = type->selftest(type, tr);
1719 /* the test is responsible for resetting too */
1720 tr->current_trace = saved_tracer;
1721 if (ret) {
1722 printk(KERN_CONT "FAILED!\n");
1723 /* Add the warning after printing 'FAILED' */
1724 WARN_ON(1);
1725 return -1;
1726 }
1727 /* Only reset on passing, to avoid touching corrupted buffers */
1728 tracing_reset_online_cpus(&tr->trace_buffer);
1729
1730#ifdef CONFIG_TRACER_MAX_TRACE
1731 if (type->use_max_tr) {
1732 tr->allocated_snapshot = false;
1733
1734 /* Shrink the max buffer again */
1735 if (ring_buffer_expanded)
1736 ring_buffer_resize(tr->max_buffer.buffer, 1,
1737 RING_BUFFER_ALL_CPUS);
1738 }
1739#endif
1740
1741 printk(KERN_CONT "PASSED\n");
1742 return 0;
1743}
1744
1745static __init int init_trace_selftests(void)
1746{
1747 struct trace_selftests *p, *n;
1748 struct tracer *t, **last;
1749 int ret;
1750
1751 selftests_can_run = true;
1752
1753 mutex_lock(&trace_types_lock);
1754
1755 if (list_empty(&postponed_selftests))
1756 goto out;
1757
1758 pr_info("Running postponed tracer tests:\n");
1759
1760 tracing_selftest_running = true;
1761 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1762 /* This loop can take minutes when sanitizers are enabled, so
1763 * lets make sure we allow RCU processing.
1764 */
1765 cond_resched();
1766 ret = run_tracer_selftest(p->type);
1767 /* If the test fails, then warn and remove from available_tracers */
1768 if (ret < 0) {
1769 WARN(1, "tracer: %s failed selftest, disabling\n",
1770 p->type->name);
1771 last = &trace_types;
1772 for (t = trace_types; t; t = t->next) {
1773 if (t == p->type) {
1774 *last = t->next;
1775 break;
1776 }
1777 last = &t->next;
1778 }
1779 }
1780 list_del(&p->list);
1781 kfree(p);
1782 }
1783 tracing_selftest_running = false;
1784
1785 out:
1786 mutex_unlock(&trace_types_lock);
1787
1788 return 0;
1789}
1790core_initcall(init_trace_selftests);
1791#else
1792static inline int run_tracer_selftest(struct tracer *type)
1793{
1794 return 0;
1795}
1796#endif /* CONFIG_FTRACE_STARTUP_TEST */
1797
1798static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1799
1800static void __init apply_trace_boot_options(void);
1801
1802/**
1803 * register_tracer - register a tracer with the ftrace system.
1804 * @type: the plugin for the tracer
1805 *
1806 * Register a new plugin tracer.
1807 */
1808int __init register_tracer(struct tracer *type)
1809{
1810 struct tracer *t;
1811 int ret = 0;
1812
1813 if (!type->name) {
1814 pr_info("Tracer must have a name\n");
1815 return -1;
1816 }
1817
1818 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1819 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1820 return -1;
1821 }
1822
1823 if (security_locked_down(LOCKDOWN_TRACEFS)) {
1824 pr_warning("Can not register tracer %s due to lockdown\n",
1825 type->name);
1826 return -EPERM;
1827 }
1828
1829 mutex_lock(&trace_types_lock);
1830
1831 tracing_selftest_running = true;
1832
1833 for (t = trace_types; t; t = t->next) {
1834 if (strcmp(type->name, t->name) == 0) {
1835 /* already found */
1836 pr_info("Tracer %s already registered\n",
1837 type->name);
1838 ret = -1;
1839 goto out;
1840 }
1841 }
1842
1843 if (!type->set_flag)
1844 type->set_flag = &dummy_set_flag;
1845 if (!type->flags) {
1846 /*allocate a dummy tracer_flags*/
1847 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1848 if (!type->flags) {
1849 ret = -ENOMEM;
1850 goto out;
1851 }
1852 type->flags->val = 0;
1853 type->flags->opts = dummy_tracer_opt;
1854 } else
1855 if (!type->flags->opts)
1856 type->flags->opts = dummy_tracer_opt;
1857
1858 /* store the tracer for __set_tracer_option */
1859 type->flags->trace = type;
1860
1861 ret = run_tracer_selftest(type);
1862 if (ret < 0)
1863 goto out;
1864
1865 type->next = trace_types;
1866 trace_types = type;
1867 add_tracer_options(&global_trace, type);
1868
1869 out:
1870 tracing_selftest_running = false;
1871 mutex_unlock(&trace_types_lock);
1872
1873 if (ret || !default_bootup_tracer)
1874 goto out_unlock;
1875
1876 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1877 goto out_unlock;
1878
1879 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1880 /* Do we want this tracer to start on bootup? */
1881 tracing_set_tracer(&global_trace, type->name);
1882 default_bootup_tracer = NULL;
1883
1884 apply_trace_boot_options();
1885
1886 /* disable other selftests, since this will break it. */
1887 tracing_selftest_disabled = true;
1888#ifdef CONFIG_FTRACE_STARTUP_TEST
1889 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1890 type->name);
1891#endif
1892
1893 out_unlock:
1894 return ret;
1895}
1896
1897static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1898{
1899 struct ring_buffer *buffer = buf->buffer;
1900
1901 if (!buffer)
1902 return;
1903
1904 ring_buffer_record_disable(buffer);
1905
1906 /* Make sure all commits have finished */
1907 synchronize_rcu();
1908 ring_buffer_reset_cpu(buffer, cpu);
1909
1910 ring_buffer_record_enable(buffer);
1911}
1912
1913void tracing_reset_online_cpus(struct trace_buffer *buf)
1914{
1915 struct ring_buffer *buffer = buf->buffer;
1916 int cpu;
1917
1918 if (!buffer)
1919 return;
1920
1921 ring_buffer_record_disable(buffer);
1922
1923 /* Make sure all commits have finished */
1924 synchronize_rcu();
1925
1926 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1927
1928 for_each_online_cpu(cpu)
1929 ring_buffer_reset_cpu(buffer, cpu);
1930
1931 ring_buffer_record_enable(buffer);
1932}
1933
1934/* Must have trace_types_lock held */
1935void tracing_reset_all_online_cpus_unlocked(void)
1936{
1937 struct trace_array *tr;
1938
1939 lockdep_assert_held(&trace_types_lock);
1940
1941 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1942 if (!tr->clear_trace)
1943 continue;
1944 tr->clear_trace = false;
1945 tracing_reset_online_cpus(&tr->trace_buffer);
1946#ifdef CONFIG_TRACER_MAX_TRACE
1947 tracing_reset_online_cpus(&tr->max_buffer);
1948#endif
1949 }
1950}
1951
1952void tracing_reset_all_online_cpus(void)
1953{
1954 mutex_lock(&trace_types_lock);
1955 tracing_reset_all_online_cpus_unlocked();
1956 mutex_unlock(&trace_types_lock);
1957}
1958
1959/*
1960 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
1961 * is the tgid last observed corresponding to pid=i.
1962 */
1963static int *tgid_map;
1964
1965/* The maximum valid index into tgid_map. */
1966static size_t tgid_map_max;
1967
1968#define SAVED_CMDLINES_DEFAULT 128
1969#define NO_CMDLINE_MAP UINT_MAX
1970/*
1971 * Preemption must be disabled before acquiring trace_cmdline_lock.
1972 * The various trace_arrays' max_lock must be acquired in a context
1973 * where interrupt is disabled.
1974 */
1975static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1976struct saved_cmdlines_buffer {
1977 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1978 unsigned *map_cmdline_to_pid;
1979 unsigned cmdline_num;
1980 int cmdline_idx;
1981 char saved_cmdlines[];
1982};
1983static struct saved_cmdlines_buffer *savedcmd;
1984
1985/* Holds the size of a cmdline and pid element */
1986#define SAVED_CMDLINE_MAP_ELEMENT_SIZE(s) \
1987 (TASK_COMM_LEN + sizeof((s)->map_cmdline_to_pid[0]))
1988
1989static inline char *get_saved_cmdlines(int idx)
1990{
1991 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1992}
1993
1994static inline void set_cmdline(int idx, const char *cmdline)
1995{
1996 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1997}
1998
1999static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2000{
2001 int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2002
2003 kmemleak_free(s);
2004 free_pages((unsigned long)s, order);
2005}
2006
2007static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2008{
2009 struct saved_cmdlines_buffer *s;
2010 struct page *page;
2011 int orig_size, size;
2012 int order;
2013
2014 /* Figure out how much is needed to hold the given number of cmdlines */
2015 orig_size = sizeof(*s) + val * SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2016 order = get_order(orig_size);
2017 size = 1 << (order + PAGE_SHIFT);
2018 page = alloc_pages(GFP_KERNEL, order);
2019 if (!page)
2020 return NULL;
2021
2022 s = page_address(page);
2023 kmemleak_alloc(s, size, 1, GFP_KERNEL);
2024 memset(s, 0, sizeof(*s));
2025
2026 /* Round up to actual allocation */
2027 val = (size - sizeof(*s)) / SAVED_CMDLINE_MAP_ELEMENT_SIZE(s);
2028 s->cmdline_num = val;
2029
2030 /* Place map_cmdline_to_pid array right after saved_cmdlines */
2031 s->map_cmdline_to_pid = (unsigned *)&s->saved_cmdlines[val * TASK_COMM_LEN];
2032
2033 s->cmdline_idx = 0;
2034 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2035 sizeof(s->map_pid_to_cmdline));
2036 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2037 val * sizeof(*s->map_cmdline_to_pid));
2038
2039 return s;
2040}
2041
2042static int trace_create_savedcmd(void)
2043{
2044 savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2045
2046 return savedcmd ? 0 : -ENOMEM;
2047}
2048
2049int is_tracing_stopped(void)
2050{
2051 return global_trace.stop_count;
2052}
2053
2054/**
2055 * tracing_start - quick start of the tracer
2056 *
2057 * If tracing is enabled but was stopped by tracing_stop,
2058 * this will start the tracer back up.
2059 */
2060void tracing_start(void)
2061{
2062 struct ring_buffer *buffer;
2063 unsigned long flags;
2064
2065 if (tracing_disabled)
2066 return;
2067
2068 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2069 if (--global_trace.stop_count) {
2070 if (global_trace.stop_count < 0) {
2071 /* Someone screwed up their debugging */
2072 WARN_ON_ONCE(1);
2073 global_trace.stop_count = 0;
2074 }
2075 goto out;
2076 }
2077
2078 /* Prevent the buffers from switching */
2079 arch_spin_lock(&global_trace.max_lock);
2080
2081 buffer = global_trace.trace_buffer.buffer;
2082 if (buffer)
2083 ring_buffer_record_enable(buffer);
2084
2085#ifdef CONFIG_TRACER_MAX_TRACE
2086 buffer = global_trace.max_buffer.buffer;
2087 if (buffer)
2088 ring_buffer_record_enable(buffer);
2089#endif
2090
2091 arch_spin_unlock(&global_trace.max_lock);
2092
2093 out:
2094 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2095}
2096
2097static void tracing_start_tr(struct trace_array *tr)
2098{
2099 struct ring_buffer *buffer;
2100 unsigned long flags;
2101
2102 if (tracing_disabled)
2103 return;
2104
2105 /* If global, we need to also start the max tracer */
2106 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2107 return tracing_start();
2108
2109 raw_spin_lock_irqsave(&tr->start_lock, flags);
2110
2111 if (--tr->stop_count) {
2112 if (tr->stop_count < 0) {
2113 /* Someone screwed up their debugging */
2114 WARN_ON_ONCE(1);
2115 tr->stop_count = 0;
2116 }
2117 goto out;
2118 }
2119
2120 buffer = tr->trace_buffer.buffer;
2121 if (buffer)
2122 ring_buffer_record_enable(buffer);
2123
2124 out:
2125 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2126}
2127
2128/**
2129 * tracing_stop - quick stop of the tracer
2130 *
2131 * Light weight way to stop tracing. Use in conjunction with
2132 * tracing_start.
2133 */
2134void tracing_stop(void)
2135{
2136 struct ring_buffer *buffer;
2137 unsigned long flags;
2138
2139 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2140 if (global_trace.stop_count++)
2141 goto out;
2142
2143 /* Prevent the buffers from switching */
2144 arch_spin_lock(&global_trace.max_lock);
2145
2146 buffer = global_trace.trace_buffer.buffer;
2147 if (buffer)
2148 ring_buffer_record_disable(buffer);
2149
2150#ifdef CONFIG_TRACER_MAX_TRACE
2151 buffer = global_trace.max_buffer.buffer;
2152 if (buffer)
2153 ring_buffer_record_disable(buffer);
2154#endif
2155
2156 arch_spin_unlock(&global_trace.max_lock);
2157
2158 out:
2159 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2160}
2161
2162static void tracing_stop_tr(struct trace_array *tr)
2163{
2164 struct ring_buffer *buffer;
2165 unsigned long flags;
2166
2167 /* If global, we need to also stop the max tracer */
2168 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2169 return tracing_stop();
2170
2171 raw_spin_lock_irqsave(&tr->start_lock, flags);
2172 if (tr->stop_count++)
2173 goto out;
2174
2175 buffer = tr->trace_buffer.buffer;
2176 if (buffer)
2177 ring_buffer_record_disable(buffer);
2178
2179 out:
2180 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2181}
2182
2183static int trace_save_cmdline(struct task_struct *tsk)
2184{
2185 unsigned tpid, idx;
2186
2187 /* treat recording of idle task as a success */
2188 if (!tsk->pid)
2189 return 1;
2190
2191 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2192
2193 /*
2194 * It's not the end of the world if we don't get
2195 * the lock, but we also don't want to spin
2196 * nor do we want to disable interrupts,
2197 * so if we miss here, then better luck next time.
2198 *
2199 * This is called within the scheduler and wake up, so interrupts
2200 * had better been disabled and run queue lock been held.
2201 */
2202 if (!arch_spin_trylock(&trace_cmdline_lock))
2203 return 0;
2204
2205 idx = savedcmd->map_pid_to_cmdline[tpid];
2206 if (idx == NO_CMDLINE_MAP) {
2207 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2208
2209 savedcmd->map_pid_to_cmdline[tpid] = idx;
2210 savedcmd->cmdline_idx = idx;
2211 }
2212
2213 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2214 set_cmdline(idx, tsk->comm);
2215
2216 arch_spin_unlock(&trace_cmdline_lock);
2217
2218 return 1;
2219}
2220
2221static void __trace_find_cmdline(int pid, char comm[])
2222{
2223 unsigned map;
2224 int tpid;
2225
2226 if (!pid) {
2227 strcpy(comm, "<idle>");
2228 return;
2229 }
2230
2231 if (WARN_ON_ONCE(pid < 0)) {
2232 strcpy(comm, "<XXX>");
2233 return;
2234 }
2235
2236 tpid = pid & (PID_MAX_DEFAULT - 1);
2237 map = savedcmd->map_pid_to_cmdline[tpid];
2238 if (map != NO_CMDLINE_MAP) {
2239 tpid = savedcmd->map_cmdline_to_pid[map];
2240 if (tpid == pid) {
2241 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2242 return;
2243 }
2244 }
2245 strcpy(comm, "<...>");
2246}
2247
2248void trace_find_cmdline(int pid, char comm[])
2249{
2250 preempt_disable();
2251 arch_spin_lock(&trace_cmdline_lock);
2252
2253 __trace_find_cmdline(pid, comm);
2254
2255 arch_spin_unlock(&trace_cmdline_lock);
2256 preempt_enable();
2257}
2258
2259static int *trace_find_tgid_ptr(int pid)
2260{
2261 /*
2262 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2263 * if we observe a non-NULL tgid_map then we also observe the correct
2264 * tgid_map_max.
2265 */
2266 int *map = smp_load_acquire(&tgid_map);
2267
2268 if (unlikely(!map || pid > tgid_map_max))
2269 return NULL;
2270
2271 return &map[pid];
2272}
2273
2274int trace_find_tgid(int pid)
2275{
2276 int *ptr = trace_find_tgid_ptr(pid);
2277
2278 return ptr ? *ptr : 0;
2279}
2280
2281static int trace_save_tgid(struct task_struct *tsk)
2282{
2283 int *ptr;
2284
2285 /* treat recording of idle task as a success */
2286 if (!tsk->pid)
2287 return 1;
2288
2289 ptr = trace_find_tgid_ptr(tsk->pid);
2290 if (!ptr)
2291 return 0;
2292
2293 *ptr = tsk->tgid;
2294 return 1;
2295}
2296
2297static bool tracing_record_taskinfo_skip(int flags)
2298{
2299 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2300 return true;
2301 if (!__this_cpu_read(trace_taskinfo_save))
2302 return true;
2303 return false;
2304}
2305
2306/**
2307 * tracing_record_taskinfo - record the task info of a task
2308 *
2309 * @task: task to record
2310 * @flags: TRACE_RECORD_CMDLINE for recording comm
2311 * TRACE_RECORD_TGID for recording tgid
2312 */
2313void tracing_record_taskinfo(struct task_struct *task, int flags)
2314{
2315 bool done;
2316
2317 if (tracing_record_taskinfo_skip(flags))
2318 return;
2319
2320 /*
2321 * Record as much task information as possible. If some fail, continue
2322 * to try to record the others.
2323 */
2324 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2325 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2326
2327 /* If recording any information failed, retry again soon. */
2328 if (!done)
2329 return;
2330
2331 __this_cpu_write(trace_taskinfo_save, false);
2332}
2333
2334/**
2335 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2336 *
2337 * @prev: previous task during sched_switch
2338 * @next: next task during sched_switch
2339 * @flags: TRACE_RECORD_CMDLINE for recording comm
2340 * TRACE_RECORD_TGID for recording tgid
2341 */
2342void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2343 struct task_struct *next, int flags)
2344{
2345 bool done;
2346
2347 if (tracing_record_taskinfo_skip(flags))
2348 return;
2349
2350 /*
2351 * Record as much task information as possible. If some fail, continue
2352 * to try to record the others.
2353 */
2354 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2355 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2356 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2357 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2358
2359 /* If recording any information failed, retry again soon. */
2360 if (!done)
2361 return;
2362
2363 __this_cpu_write(trace_taskinfo_save, false);
2364}
2365
2366/* Helpers to record a specific task information */
2367void tracing_record_cmdline(struct task_struct *task)
2368{
2369 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2370}
2371
2372void tracing_record_tgid(struct task_struct *task)
2373{
2374 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2375}
2376
2377/*
2378 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2379 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2380 * simplifies those functions and keeps them in sync.
2381 */
2382enum print_line_t trace_handle_return(struct trace_seq *s)
2383{
2384 return trace_seq_has_overflowed(s) ?
2385 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2386}
2387EXPORT_SYMBOL_GPL(trace_handle_return);
2388
2389void
2390tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2391 unsigned long flags, int pc)
2392{
2393 struct task_struct *tsk = current;
2394
2395 entry->preempt_count = pc & 0xff;
2396 entry->pid = (tsk) ? tsk->pid : 0;
2397 entry->type = type;
2398 entry->flags =
2399#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2400 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2401#else
2402 TRACE_FLAG_IRQS_NOSUPPORT |
2403#endif
2404 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2405 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2406 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2407 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2408 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2409}
2410EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2411
2412struct ring_buffer_event *
2413trace_buffer_lock_reserve(struct ring_buffer *buffer,
2414 int type,
2415 unsigned long len,
2416 unsigned long flags, int pc)
2417{
2418 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2419}
2420
2421DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2422DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2423static int trace_buffered_event_ref;
2424
2425/**
2426 * trace_buffered_event_enable - enable buffering events
2427 *
2428 * When events are being filtered, it is quicker to use a temporary
2429 * buffer to write the event data into if there's a likely chance
2430 * that it will not be committed. The discard of the ring buffer
2431 * is not as fast as committing, and is much slower than copying
2432 * a commit.
2433 *
2434 * When an event is to be filtered, allocate per cpu buffers to
2435 * write the event data into, and if the event is filtered and discarded
2436 * it is simply dropped, otherwise, the entire data is to be committed
2437 * in one shot.
2438 */
2439void trace_buffered_event_enable(void)
2440{
2441 struct ring_buffer_event *event;
2442 struct page *page;
2443 int cpu;
2444
2445 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2446
2447 if (trace_buffered_event_ref++)
2448 return;
2449
2450 for_each_tracing_cpu(cpu) {
2451 page = alloc_pages_node(cpu_to_node(cpu),
2452 GFP_KERNEL | __GFP_NORETRY, 0);
2453 /* This is just an optimization and can handle failures */
2454 if (!page) {
2455 pr_err("Failed to allocate event buffer\n");
2456 break;
2457 }
2458
2459 event = page_address(page);
2460 memset(event, 0, sizeof(*event));
2461
2462 per_cpu(trace_buffered_event, cpu) = event;
2463
2464 preempt_disable();
2465 if (cpu == smp_processor_id() &&
2466 this_cpu_read(trace_buffered_event) !=
2467 per_cpu(trace_buffered_event, cpu))
2468 WARN_ON_ONCE(1);
2469 preempt_enable();
2470 }
2471}
2472
2473static void enable_trace_buffered_event(void *data)
2474{
2475 /* Probably not needed, but do it anyway */
2476 smp_rmb();
2477 this_cpu_dec(trace_buffered_event_cnt);
2478}
2479
2480static void disable_trace_buffered_event(void *data)
2481{
2482 this_cpu_inc(trace_buffered_event_cnt);
2483}
2484
2485/**
2486 * trace_buffered_event_disable - disable buffering events
2487 *
2488 * When a filter is removed, it is faster to not use the buffered
2489 * events, and to commit directly into the ring buffer. Free up
2490 * the temp buffers when there are no more users. This requires
2491 * special synchronization with current events.
2492 */
2493void trace_buffered_event_disable(void)
2494{
2495 int cpu;
2496
2497 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2498
2499 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2500 return;
2501
2502 if (--trace_buffered_event_ref)
2503 return;
2504
2505 /* For each CPU, set the buffer as used. */
2506 on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2507 NULL, true);
2508
2509 /* Wait for all current users to finish */
2510 synchronize_rcu();
2511
2512 for_each_tracing_cpu(cpu) {
2513 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2514 per_cpu(trace_buffered_event, cpu) = NULL;
2515 }
2516
2517 /*
2518 * Wait for all CPUs that potentially started checking if they can use
2519 * their event buffer only after the previous synchronize_rcu() call and
2520 * they still read a valid pointer from trace_buffered_event. It must be
2521 * ensured they don't see cleared trace_buffered_event_cnt else they
2522 * could wrongly decide to use the pointed-to buffer which is now freed.
2523 */
2524 synchronize_rcu();
2525
2526 /* For each CPU, relinquish the buffer */
2527 on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2528 true);
2529}
2530
2531static struct ring_buffer *temp_buffer;
2532
2533struct ring_buffer_event *
2534trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2535 struct trace_event_file *trace_file,
2536 int type, unsigned long len,
2537 unsigned long flags, int pc)
2538{
2539 struct ring_buffer_event *entry;
2540 int val;
2541
2542 *current_rb = trace_file->tr->trace_buffer.buffer;
2543
2544 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2545 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2546 (entry = this_cpu_read(trace_buffered_event))) {
2547 /* Try to use the per cpu buffer first */
2548 val = this_cpu_inc_return(trace_buffered_event_cnt);
2549 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2550 trace_event_setup(entry, type, flags, pc);
2551 entry->array[0] = len;
2552 return entry;
2553 }
2554 this_cpu_dec(trace_buffered_event_cnt);
2555 }
2556
2557 entry = __trace_buffer_lock_reserve(*current_rb,
2558 type, len, flags, pc);
2559 /*
2560 * If tracing is off, but we have triggers enabled
2561 * we still need to look at the event data. Use the temp_buffer
2562 * to store the trace event for the trigger to use. It's recursive
2563 * safe and will not be recorded anywhere.
2564 */
2565 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2566 *current_rb = temp_buffer;
2567 entry = __trace_buffer_lock_reserve(*current_rb,
2568 type, len, flags, pc);
2569 }
2570 return entry;
2571}
2572EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2573
2574static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2575static DEFINE_MUTEX(tracepoint_printk_mutex);
2576
2577static void output_printk(struct trace_event_buffer *fbuffer)
2578{
2579 struct trace_event_call *event_call;
2580 struct trace_event *event;
2581 unsigned long flags;
2582 struct trace_iterator *iter = tracepoint_print_iter;
2583
2584 /* We should never get here if iter is NULL */
2585 if (WARN_ON_ONCE(!iter))
2586 return;
2587
2588 event_call = fbuffer->trace_file->event_call;
2589 if (!event_call || !event_call->event.funcs ||
2590 !event_call->event.funcs->trace)
2591 return;
2592
2593 event = &fbuffer->trace_file->event_call->event;
2594
2595 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2596 trace_seq_init(&iter->seq);
2597 iter->ent = fbuffer->entry;
2598 event_call->event.funcs->trace(iter, 0, event);
2599 trace_seq_putc(&iter->seq, 0);
2600 printk("%s", iter->seq.buffer);
2601
2602 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2603}
2604
2605int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2606 void __user *buffer, size_t *lenp,
2607 loff_t *ppos)
2608{
2609 int save_tracepoint_printk;
2610 int ret;
2611
2612 mutex_lock(&tracepoint_printk_mutex);
2613 save_tracepoint_printk = tracepoint_printk;
2614
2615 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2616
2617 /*
2618 * This will force exiting early, as tracepoint_printk
2619 * is always zero when tracepoint_printk_iter is not allocated
2620 */
2621 if (!tracepoint_print_iter)
2622 tracepoint_printk = 0;
2623
2624 if (save_tracepoint_printk == tracepoint_printk)
2625 goto out;
2626
2627 if (tracepoint_printk)
2628 static_key_enable(&tracepoint_printk_key.key);
2629 else
2630 static_key_disable(&tracepoint_printk_key.key);
2631
2632 out:
2633 mutex_unlock(&tracepoint_printk_mutex);
2634
2635 return ret;
2636}
2637
2638void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2639{
2640 if (static_key_false(&tracepoint_printk_key.key))
2641 output_printk(fbuffer);
2642
2643 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2644 fbuffer->event, fbuffer->entry,
2645 fbuffer->flags, fbuffer->pc);
2646}
2647EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2648
2649/*
2650 * Skip 3:
2651 *
2652 * trace_buffer_unlock_commit_regs()
2653 * trace_event_buffer_commit()
2654 * trace_event_raw_event_xxx()
2655 */
2656# define STACK_SKIP 3
2657
2658void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2659 struct ring_buffer *buffer,
2660 struct ring_buffer_event *event,
2661 unsigned long flags, int pc,
2662 struct pt_regs *regs)
2663{
2664 __buffer_unlock_commit(buffer, event);
2665
2666 /*
2667 * If regs is not set, then skip the necessary functions.
2668 * Note, we can still get here via blktrace, wakeup tracer
2669 * and mmiotrace, but that's ok if they lose a function or
2670 * two. They are not that meaningful.
2671 */
2672 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2673 ftrace_trace_userstack(tr, buffer, flags, pc);
2674}
2675
2676/*
2677 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2678 */
2679void
2680trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2681 struct ring_buffer_event *event)
2682{
2683 __buffer_unlock_commit(buffer, event);
2684}
2685
2686static void
2687trace_process_export(struct trace_export *export,
2688 struct ring_buffer_event *event)
2689{
2690 struct trace_entry *entry;
2691 unsigned int size = 0;
2692
2693 entry = ring_buffer_event_data(event);
2694 size = ring_buffer_event_length(event);
2695 export->write(export, entry, size);
2696}
2697
2698static DEFINE_MUTEX(ftrace_export_lock);
2699
2700static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2701
2702static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2703
2704static inline void ftrace_exports_enable(void)
2705{
2706 static_branch_enable(&ftrace_exports_enabled);
2707}
2708
2709static inline void ftrace_exports_disable(void)
2710{
2711 static_branch_disable(&ftrace_exports_enabled);
2712}
2713
2714static void ftrace_exports(struct ring_buffer_event *event)
2715{
2716 struct trace_export *export;
2717
2718 preempt_disable_notrace();
2719
2720 export = rcu_dereference_raw_check(ftrace_exports_list);
2721 while (export) {
2722 trace_process_export(export, event);
2723 export = rcu_dereference_raw_check(export->next);
2724 }
2725
2726 preempt_enable_notrace();
2727}
2728
2729static inline void
2730add_trace_export(struct trace_export **list, struct trace_export *export)
2731{
2732 rcu_assign_pointer(export->next, *list);
2733 /*
2734 * We are entering export into the list but another
2735 * CPU might be walking that list. We need to make sure
2736 * the export->next pointer is valid before another CPU sees
2737 * the export pointer included into the list.
2738 */
2739 rcu_assign_pointer(*list, export);
2740}
2741
2742static inline int
2743rm_trace_export(struct trace_export **list, struct trace_export *export)
2744{
2745 struct trace_export **p;
2746
2747 for (p = list; *p != NULL; p = &(*p)->next)
2748 if (*p == export)
2749 break;
2750
2751 if (*p != export)
2752 return -1;
2753
2754 rcu_assign_pointer(*p, (*p)->next);
2755
2756 return 0;
2757}
2758
2759static inline void
2760add_ftrace_export(struct trace_export **list, struct trace_export *export)
2761{
2762 if (*list == NULL)
2763 ftrace_exports_enable();
2764
2765 add_trace_export(list, export);
2766}
2767
2768static inline int
2769rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2770{
2771 int ret;
2772
2773 ret = rm_trace_export(list, export);
2774 if (*list == NULL)
2775 ftrace_exports_disable();
2776
2777 return ret;
2778}
2779
2780int register_ftrace_export(struct trace_export *export)
2781{
2782 if (WARN_ON_ONCE(!export->write))
2783 return -1;
2784
2785 mutex_lock(&ftrace_export_lock);
2786
2787 add_ftrace_export(&ftrace_exports_list, export);
2788
2789 mutex_unlock(&ftrace_export_lock);
2790
2791 return 0;
2792}
2793EXPORT_SYMBOL_GPL(register_ftrace_export);
2794
2795int unregister_ftrace_export(struct trace_export *export)
2796{
2797 int ret;
2798
2799 mutex_lock(&ftrace_export_lock);
2800
2801 ret = rm_ftrace_export(&ftrace_exports_list, export);
2802
2803 mutex_unlock(&ftrace_export_lock);
2804
2805 return ret;
2806}
2807EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2808
2809void
2810trace_function(struct trace_array *tr,
2811 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2812 int pc)
2813{
2814 struct trace_event_call *call = &event_function;
2815 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2816 struct ring_buffer_event *event;
2817 struct ftrace_entry *entry;
2818
2819 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2820 flags, pc);
2821 if (!event)
2822 return;
2823 entry = ring_buffer_event_data(event);
2824 entry->ip = ip;
2825 entry->parent_ip = parent_ip;
2826
2827 if (!call_filter_check_discard(call, entry, buffer, event)) {
2828 if (static_branch_unlikely(&ftrace_exports_enabled))
2829 ftrace_exports(event);
2830 __buffer_unlock_commit(buffer, event);
2831 }
2832}
2833
2834#ifdef CONFIG_STACKTRACE
2835
2836/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2837#define FTRACE_KSTACK_NESTING 4
2838
2839#define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2840
2841struct ftrace_stack {
2842 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2843};
2844
2845
2846struct ftrace_stacks {
2847 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2848};
2849
2850static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2851static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2852
2853static void __ftrace_trace_stack(struct ring_buffer *buffer,
2854 unsigned long flags,
2855 int skip, int pc, struct pt_regs *regs)
2856{
2857 struct trace_event_call *call = &event_kernel_stack;
2858 struct ring_buffer_event *event;
2859 unsigned int size, nr_entries;
2860 struct ftrace_stack *fstack;
2861 struct stack_entry *entry;
2862 int stackidx;
2863
2864 /*
2865 * Add one, for this function and the call to save_stack_trace()
2866 * If regs is set, then these functions will not be in the way.
2867 */
2868#ifndef CONFIG_UNWINDER_ORC
2869 if (!regs)
2870 skip++;
2871#endif
2872
2873 /*
2874 * Since events can happen in NMIs there's no safe way to
2875 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2876 * or NMI comes in, it will just have to use the default
2877 * FTRACE_STACK_SIZE.
2878 */
2879 preempt_disable_notrace();
2880
2881 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2882
2883 /* This should never happen. If it does, yell once and skip */
2884 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2885 goto out;
2886
2887 /*
2888 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2889 * interrupt will either see the value pre increment or post
2890 * increment. If the interrupt happens pre increment it will have
2891 * restored the counter when it returns. We just need a barrier to
2892 * keep gcc from moving things around.
2893 */
2894 barrier();
2895
2896 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2897 size = ARRAY_SIZE(fstack->calls);
2898
2899 if (regs) {
2900 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2901 size, skip);
2902 } else {
2903 nr_entries = stack_trace_save(fstack->calls, size, skip);
2904 }
2905
2906 size = nr_entries * sizeof(unsigned long);
2907 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2908 (sizeof(*entry) - sizeof(entry->caller)) + size,
2909 flags, pc);
2910 if (!event)
2911 goto out;
2912 entry = ring_buffer_event_data(event);
2913
2914 memcpy(&entry->caller, fstack->calls, size);
2915 entry->size = nr_entries;
2916
2917 if (!call_filter_check_discard(call, entry, buffer, event))
2918 __buffer_unlock_commit(buffer, event);
2919
2920 out:
2921 /* Again, don't let gcc optimize things here */
2922 barrier();
2923 __this_cpu_dec(ftrace_stack_reserve);
2924 preempt_enable_notrace();
2925
2926}
2927
2928static inline void ftrace_trace_stack(struct trace_array *tr,
2929 struct ring_buffer *buffer,
2930 unsigned long flags,
2931 int skip, int pc, struct pt_regs *regs)
2932{
2933 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2934 return;
2935
2936 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2937}
2938
2939void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2940 int pc)
2941{
2942 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2943
2944 if (rcu_is_watching()) {
2945 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2946 return;
2947 }
2948
2949 /*
2950 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2951 * but if the above rcu_is_watching() failed, then the NMI
2952 * triggered someplace critical, and rcu_irq_enter() should
2953 * not be called from NMI.
2954 */
2955 if (unlikely(in_nmi()))
2956 return;
2957
2958 rcu_irq_enter_irqson();
2959 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2960 rcu_irq_exit_irqson();
2961}
2962
2963/**
2964 * trace_dump_stack - record a stack back trace in the trace buffer
2965 * @skip: Number of functions to skip (helper handlers)
2966 */
2967void trace_dump_stack(int skip)
2968{
2969 unsigned long flags;
2970
2971 if (tracing_disabled || tracing_selftest_running)
2972 return;
2973
2974 local_save_flags(flags);
2975
2976#ifndef CONFIG_UNWINDER_ORC
2977 /* Skip 1 to skip this function. */
2978 skip++;
2979#endif
2980 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2981 flags, skip, preempt_count(), NULL);
2982}
2983EXPORT_SYMBOL_GPL(trace_dump_stack);
2984
2985#ifdef CONFIG_USER_STACKTRACE_SUPPORT
2986static DEFINE_PER_CPU(int, user_stack_count);
2987
2988static void
2989ftrace_trace_userstack(struct trace_array *tr,
2990 struct ring_buffer *buffer, unsigned long flags, int pc)
2991{
2992 struct trace_event_call *call = &event_user_stack;
2993 struct ring_buffer_event *event;
2994 struct userstack_entry *entry;
2995
2996 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
2997 return;
2998
2999 /*
3000 * NMIs can not handle page faults, even with fix ups.
3001 * The save user stack can (and often does) fault.
3002 */
3003 if (unlikely(in_nmi()))
3004 return;
3005
3006 /*
3007 * prevent recursion, since the user stack tracing may
3008 * trigger other kernel events.
3009 */
3010 preempt_disable();
3011 if (__this_cpu_read(user_stack_count))
3012 goto out;
3013
3014 __this_cpu_inc(user_stack_count);
3015
3016 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3017 sizeof(*entry), flags, pc);
3018 if (!event)
3019 goto out_drop_count;
3020 entry = ring_buffer_event_data(event);
3021
3022 entry->tgid = current->tgid;
3023 memset(&entry->caller, 0, sizeof(entry->caller));
3024
3025 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3026 if (!call_filter_check_discard(call, entry, buffer, event))
3027 __buffer_unlock_commit(buffer, event);
3028
3029 out_drop_count:
3030 __this_cpu_dec(user_stack_count);
3031 out:
3032 preempt_enable();
3033}
3034#else /* CONFIG_USER_STACKTRACE_SUPPORT */
3035static void ftrace_trace_userstack(struct trace_array *tr,
3036 struct ring_buffer *buffer,
3037 unsigned long flags, int pc)
3038{
3039}
3040#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3041
3042#endif /* CONFIG_STACKTRACE */
3043
3044/* created for use with alloc_percpu */
3045struct trace_buffer_struct {
3046 int nesting;
3047 char buffer[4][TRACE_BUF_SIZE];
3048};
3049
3050static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3051
3052/*
3053 * Thise allows for lockless recording. If we're nested too deeply, then
3054 * this returns NULL.
3055 */
3056static char *get_trace_buf(void)
3057{
3058 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3059
3060 if (!trace_percpu_buffer || buffer->nesting >= 4)
3061 return NULL;
3062
3063 buffer->nesting++;
3064
3065 /* Interrupts must see nesting incremented before we use the buffer */
3066 barrier();
3067 return &buffer->buffer[buffer->nesting - 1][0];
3068}
3069
3070static void put_trace_buf(void)
3071{
3072 /* Don't let the decrement of nesting leak before this */
3073 barrier();
3074 this_cpu_dec(trace_percpu_buffer->nesting);
3075}
3076
3077static int alloc_percpu_trace_buffer(void)
3078{
3079 struct trace_buffer_struct __percpu *buffers;
3080
3081 if (trace_percpu_buffer)
3082 return 0;
3083
3084 buffers = alloc_percpu(struct trace_buffer_struct);
3085 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3086 return -ENOMEM;
3087
3088 trace_percpu_buffer = buffers;
3089 return 0;
3090}
3091
3092static int buffers_allocated;
3093
3094void trace_printk_init_buffers(void)
3095{
3096 if (buffers_allocated)
3097 return;
3098
3099 if (alloc_percpu_trace_buffer())
3100 return;
3101
3102 /* trace_printk() is for debug use only. Don't use it in production. */
3103
3104 pr_warn("\n");
3105 pr_warn("**********************************************************\n");
3106 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3107 pr_warn("** **\n");
3108 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3109 pr_warn("** **\n");
3110 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3111 pr_warn("** unsafe for production use. **\n");
3112 pr_warn("** **\n");
3113 pr_warn("** If you see this message and you are not debugging **\n");
3114 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3115 pr_warn("** **\n");
3116 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3117 pr_warn("**********************************************************\n");
3118
3119 /* Expand the buffers to set size */
3120 tracing_update_buffers();
3121
3122 buffers_allocated = 1;
3123
3124 /*
3125 * trace_printk_init_buffers() can be called by modules.
3126 * If that happens, then we need to start cmdline recording
3127 * directly here. If the global_trace.buffer is already
3128 * allocated here, then this was called by module code.
3129 */
3130 if (global_trace.trace_buffer.buffer)
3131 tracing_start_cmdline_record();
3132}
3133EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3134
3135void trace_printk_start_comm(void)
3136{
3137 /* Start tracing comms if trace printk is set */
3138 if (!buffers_allocated)
3139 return;
3140 tracing_start_cmdline_record();
3141}
3142
3143static void trace_printk_start_stop_comm(int enabled)
3144{
3145 if (!buffers_allocated)
3146 return;
3147
3148 if (enabled)
3149 tracing_start_cmdline_record();
3150 else
3151 tracing_stop_cmdline_record();
3152}
3153
3154/**
3155 * trace_vbprintk - write binary msg to tracing buffer
3156 * @ip: The address of the caller
3157 * @fmt: The string format to write to the buffer
3158 * @args: Arguments for @fmt
3159 */
3160int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3161{
3162 struct trace_event_call *call = &event_bprint;
3163 struct ring_buffer_event *event;
3164 struct ring_buffer *buffer;
3165 struct trace_array *tr = &global_trace;
3166 struct bprint_entry *entry;
3167 unsigned long flags;
3168 char *tbuffer;
3169 int len = 0, size, pc;
3170
3171 if (unlikely(tracing_selftest_running || tracing_disabled))
3172 return 0;
3173
3174 /* Don't pollute graph traces with trace_vprintk internals */
3175 pause_graph_tracing();
3176
3177 pc = preempt_count();
3178 preempt_disable_notrace();
3179
3180 tbuffer = get_trace_buf();
3181 if (!tbuffer) {
3182 len = 0;
3183 goto out_nobuffer;
3184 }
3185
3186 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3187
3188 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3189 goto out;
3190
3191 local_save_flags(flags);
3192 size = sizeof(*entry) + sizeof(u32) * len;
3193 buffer = tr->trace_buffer.buffer;
3194 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3195 flags, pc);
3196 if (!event)
3197 goto out;
3198 entry = ring_buffer_event_data(event);
3199 entry->ip = ip;
3200 entry->fmt = fmt;
3201
3202 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3203 if (!call_filter_check_discard(call, entry, buffer, event)) {
3204 __buffer_unlock_commit(buffer, event);
3205 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3206 }
3207
3208out:
3209 put_trace_buf();
3210
3211out_nobuffer:
3212 preempt_enable_notrace();
3213 unpause_graph_tracing();
3214
3215 return len;
3216}
3217EXPORT_SYMBOL_GPL(trace_vbprintk);
3218
3219__printf(3, 0)
3220static int
3221__trace_array_vprintk(struct ring_buffer *buffer,
3222 unsigned long ip, const char *fmt, va_list args)
3223{
3224 struct trace_event_call *call = &event_print;
3225 struct ring_buffer_event *event;
3226 int len = 0, size, pc;
3227 struct print_entry *entry;
3228 unsigned long flags;
3229 char *tbuffer;
3230
3231 if (tracing_disabled || tracing_selftest_running)
3232 return 0;
3233
3234 /* Don't pollute graph traces with trace_vprintk internals */
3235 pause_graph_tracing();
3236
3237 pc = preempt_count();
3238 preempt_disable_notrace();
3239
3240
3241 tbuffer = get_trace_buf();
3242 if (!tbuffer) {
3243 len = 0;
3244 goto out_nobuffer;
3245 }
3246
3247 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3248
3249 local_save_flags(flags);
3250 size = sizeof(*entry) + len + 1;
3251 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3252 flags, pc);
3253 if (!event)
3254 goto out;
3255 entry = ring_buffer_event_data(event);
3256 entry->ip = ip;
3257
3258 memcpy(&entry->buf, tbuffer, len + 1);
3259 if (!call_filter_check_discard(call, entry, buffer, event)) {
3260 __buffer_unlock_commit(buffer, event);
3261 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3262 }
3263
3264out:
3265 put_trace_buf();
3266
3267out_nobuffer:
3268 preempt_enable_notrace();
3269 unpause_graph_tracing();
3270
3271 return len;
3272}
3273
3274__printf(3, 0)
3275int trace_array_vprintk(struct trace_array *tr,
3276 unsigned long ip, const char *fmt, va_list args)
3277{
3278 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3279}
3280
3281/**
3282 * trace_array_printk - Print a message to a specific instance
3283 * @tr: The instance trace_array descriptor
3284 * @ip: The instruction pointer that this is called from.
3285 * @fmt: The format to print (printf format)
3286 *
3287 * If a subsystem sets up its own instance, they have the right to
3288 * printk strings into their tracing instance buffer using this
3289 * function. Note, this function will not write into the top level
3290 * buffer (use trace_printk() for that), as writing into the top level
3291 * buffer should only have events that can be individually disabled.
3292 * trace_printk() is only used for debugging a kernel, and should not
3293 * be ever encorporated in normal use.
3294 *
3295 * trace_array_printk() can be used, as it will not add noise to the
3296 * top level tracing buffer.
3297 *
3298 * Note, trace_array_init_printk() must be called on @tr before this
3299 * can be used.
3300 */
3301__printf(3, 0)
3302int trace_array_printk(struct trace_array *tr,
3303 unsigned long ip, const char *fmt, ...)
3304{
3305 int ret;
3306 va_list ap;
3307
3308 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3309 return 0;
3310
3311 if (!tr)
3312 return -ENOENT;
3313
3314 va_start(ap, fmt);
3315 ret = trace_array_vprintk(tr, ip, fmt, ap);
3316 va_end(ap);
3317 return ret;
3318}
3319EXPORT_SYMBOL_GPL(trace_array_printk);
3320
3321/**
3322 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3323 * @tr: The trace array to initialize the buffers for
3324 *
3325 * As trace_array_printk() only writes into instances, they are OK to
3326 * have in the kernel (unlike trace_printk()). This needs to be called
3327 * before trace_array_printk() can be used on a trace_array.
3328 */
3329int trace_array_init_printk(struct trace_array *tr)
3330{
3331 if (!tr)
3332 return -ENOENT;
3333
3334 /* This is only allowed for created instances */
3335 if (tr == &global_trace)
3336 return -EINVAL;
3337
3338 return alloc_percpu_trace_buffer();
3339}
3340EXPORT_SYMBOL_GPL(trace_array_init_printk);
3341
3342__printf(3, 4)
3343int trace_array_printk_buf(struct ring_buffer *buffer,
3344 unsigned long ip, const char *fmt, ...)
3345{
3346 int ret;
3347 va_list ap;
3348
3349 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3350 return 0;
3351
3352 va_start(ap, fmt);
3353 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3354 va_end(ap);
3355 return ret;
3356}
3357
3358__printf(2, 0)
3359int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3360{
3361 return trace_array_vprintk(&global_trace, ip, fmt, args);
3362}
3363EXPORT_SYMBOL_GPL(trace_vprintk);
3364
3365static void trace_iterator_increment(struct trace_iterator *iter)
3366{
3367 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3368
3369 iter->idx++;
3370 if (buf_iter)
3371 ring_buffer_iter_advance(buf_iter);
3372}
3373
3374static struct trace_entry *
3375peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3376 unsigned long *lost_events)
3377{
3378 struct ring_buffer_event *event;
3379 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3380
3381 if (buf_iter)
3382 event = ring_buffer_iter_peek(buf_iter, ts);
3383 else
3384 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3385 lost_events);
3386
3387 if (event) {
3388 iter->ent_size = ring_buffer_event_length(event);
3389 return ring_buffer_event_data(event);
3390 }
3391 iter->ent_size = 0;
3392 return NULL;
3393}
3394
3395static struct trace_entry *
3396__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3397 unsigned long *missing_events, u64 *ent_ts)
3398{
3399 struct ring_buffer *buffer = iter->trace_buffer->buffer;
3400 struct trace_entry *ent, *next = NULL;
3401 unsigned long lost_events = 0, next_lost = 0;
3402 int cpu_file = iter->cpu_file;
3403 u64 next_ts = 0, ts;
3404 int next_cpu = -1;
3405 int next_size = 0;
3406 int cpu;
3407
3408 /*
3409 * If we are in a per_cpu trace file, don't bother by iterating over
3410 * all cpu and peek directly.
3411 */
3412 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3413 if (ring_buffer_empty_cpu(buffer, cpu_file))
3414 return NULL;
3415 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3416 if (ent_cpu)
3417 *ent_cpu = cpu_file;
3418
3419 return ent;
3420 }
3421
3422 for_each_tracing_cpu(cpu) {
3423
3424 if (ring_buffer_empty_cpu(buffer, cpu))
3425 continue;
3426
3427 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3428
3429 /*
3430 * Pick the entry with the smallest timestamp:
3431 */
3432 if (ent && (!next || ts < next_ts)) {
3433 next = ent;
3434 next_cpu = cpu;
3435 next_ts = ts;
3436 next_lost = lost_events;
3437 next_size = iter->ent_size;
3438 }
3439 }
3440
3441 iter->ent_size = next_size;
3442
3443 if (ent_cpu)
3444 *ent_cpu = next_cpu;
3445
3446 if (ent_ts)
3447 *ent_ts = next_ts;
3448
3449 if (missing_events)
3450 *missing_events = next_lost;
3451
3452 return next;
3453}
3454
3455/* Find the next real entry, without updating the iterator itself */
3456struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3457 int *ent_cpu, u64 *ent_ts)
3458{
3459 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3460}
3461
3462/* Find the next real entry, and increment the iterator to the next entry */
3463void *trace_find_next_entry_inc(struct trace_iterator *iter)
3464{
3465 iter->ent = __find_next_entry(iter, &iter->cpu,
3466 &iter->lost_events, &iter->ts);
3467
3468 if (iter->ent)
3469 trace_iterator_increment(iter);
3470
3471 return iter->ent ? iter : NULL;
3472}
3473
3474static void trace_consume(struct trace_iterator *iter)
3475{
3476 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3477 &iter->lost_events);
3478}
3479
3480static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3481{
3482 struct trace_iterator *iter = m->private;
3483 int i = (int)*pos;
3484 void *ent;
3485
3486 WARN_ON_ONCE(iter->leftover);
3487
3488 (*pos)++;
3489
3490 /* can't go backwards */
3491 if (iter->idx > i)
3492 return NULL;
3493
3494 if (iter->idx < 0)
3495 ent = trace_find_next_entry_inc(iter);
3496 else
3497 ent = iter;
3498
3499 while (ent && iter->idx < i)
3500 ent = trace_find_next_entry_inc(iter);
3501
3502 iter->pos = *pos;
3503
3504 return ent;
3505}
3506
3507void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3508{
3509 struct ring_buffer_event *event;
3510 struct ring_buffer_iter *buf_iter;
3511 unsigned long entries = 0;
3512 u64 ts;
3513
3514 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3515
3516 buf_iter = trace_buffer_iter(iter, cpu);
3517 if (!buf_iter)
3518 return;
3519
3520 ring_buffer_iter_reset(buf_iter);
3521
3522 /*
3523 * We could have the case with the max latency tracers
3524 * that a reset never took place on a cpu. This is evident
3525 * by the timestamp being before the start of the buffer.
3526 */
3527 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3528 if (ts >= iter->trace_buffer->time_start)
3529 break;
3530 entries++;
3531 ring_buffer_iter_advance(buf_iter);
3532 /* This could be a big loop */
3533 cond_resched();
3534 }
3535
3536 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3537}
3538
3539/*
3540 * The current tracer is copied to avoid a global locking
3541 * all around.
3542 */
3543static void *s_start(struct seq_file *m, loff_t *pos)
3544{
3545 struct trace_iterator *iter = m->private;
3546 struct trace_array *tr = iter->tr;
3547 int cpu_file = iter->cpu_file;
3548 void *p = NULL;
3549 loff_t l = 0;
3550 int cpu;
3551
3552 /*
3553 * copy the tracer to avoid using a global lock all around.
3554 * iter->trace is a copy of current_trace, the pointer to the
3555 * name may be used instead of a strcmp(), as iter->trace->name
3556 * will point to the same string as current_trace->name.
3557 */
3558 mutex_lock(&trace_types_lock);
3559 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
3560 /* Close iter->trace before switching to the new current tracer */
3561 if (iter->trace->close)
3562 iter->trace->close(iter);
3563 *iter->trace = *tr->current_trace;
3564 /* Reopen the new current tracer */
3565 if (iter->trace->open)
3566 iter->trace->open(iter);
3567 }
3568 mutex_unlock(&trace_types_lock);
3569
3570#ifdef CONFIG_TRACER_MAX_TRACE
3571 if (iter->snapshot && iter->trace->use_max_tr)
3572 return ERR_PTR(-EBUSY);
3573#endif
3574
3575 if (*pos != iter->pos) {
3576 iter->ent = NULL;
3577 iter->cpu = 0;
3578 iter->idx = -1;
3579
3580 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3581 for_each_tracing_cpu(cpu)
3582 tracing_iter_reset(iter, cpu);
3583 } else
3584 tracing_iter_reset(iter, cpu_file);
3585
3586 iter->leftover = 0;
3587 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3588 ;
3589
3590 } else {
3591 /*
3592 * If we overflowed the seq_file before, then we want
3593 * to just reuse the trace_seq buffer again.
3594 */
3595 if (iter->leftover)
3596 p = iter;
3597 else {
3598 l = *pos - 1;
3599 p = s_next(m, p, &l);
3600 }
3601 }
3602
3603 trace_event_read_lock();
3604 trace_access_lock(cpu_file);
3605 return p;
3606}
3607
3608static void s_stop(struct seq_file *m, void *p)
3609{
3610 struct trace_iterator *iter = m->private;
3611
3612#ifdef CONFIG_TRACER_MAX_TRACE
3613 if (iter->snapshot && iter->trace->use_max_tr)
3614 return;
3615#endif
3616
3617 trace_access_unlock(iter->cpu_file);
3618 trace_event_read_unlock();
3619}
3620
3621static void
3622get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3623 unsigned long *entries, int cpu)
3624{
3625 unsigned long count;
3626
3627 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3628 /*
3629 * If this buffer has skipped entries, then we hold all
3630 * entries for the trace and we need to ignore the
3631 * ones before the time stamp.
3632 */
3633 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3634 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3635 /* total is the same as the entries */
3636 *total = count;
3637 } else
3638 *total = count +
3639 ring_buffer_overrun_cpu(buf->buffer, cpu);
3640 *entries = count;
3641}
3642
3643static void
3644get_total_entries(struct trace_buffer *buf,
3645 unsigned long *total, unsigned long *entries)
3646{
3647 unsigned long t, e;
3648 int cpu;
3649
3650 *total = 0;
3651 *entries = 0;
3652
3653 for_each_tracing_cpu(cpu) {
3654 get_total_entries_cpu(buf, &t, &e, cpu);
3655 *total += t;
3656 *entries += e;
3657 }
3658}
3659
3660unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3661{
3662 unsigned long total, entries;
3663
3664 if (!tr)
3665 tr = &global_trace;
3666
3667 get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3668
3669 return entries;
3670}
3671
3672unsigned long trace_total_entries(struct trace_array *tr)
3673{
3674 unsigned long total, entries;
3675
3676 if (!tr)
3677 tr = &global_trace;
3678
3679 get_total_entries(&tr->trace_buffer, &total, &entries);
3680
3681 return entries;
3682}
3683
3684static void print_lat_help_header(struct seq_file *m)
3685{
3686 seq_puts(m, "# _------=> CPU# \n"
3687 "# / _-----=> irqs-off \n"
3688 "# | / _----=> need-resched \n"
3689 "# || / _---=> hardirq/softirq \n"
3690 "# ||| / _--=> preempt-depth \n"
3691 "# |||| / delay \n"
3692 "# cmd pid ||||| time | caller \n"
3693 "# \\ / ||||| \\ | / \n");
3694}
3695
3696static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3697{
3698 unsigned long total;
3699 unsigned long entries;
3700
3701 get_total_entries(buf, &total, &entries);
3702 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3703 entries, total, num_online_cpus());
3704 seq_puts(m, "#\n");
3705}
3706
3707static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3708 unsigned int flags)
3709{
3710 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3711
3712 print_event_info(buf, m);
3713
3714 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
3715 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3716}
3717
3718static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3719 unsigned int flags)
3720{
3721 bool tgid = flags & TRACE_ITER_RECORD_TGID;
3722 const char *space = " ";
3723 int prec = tgid ? 12 : 2;
3724
3725 print_event_info(buf, m);
3726
3727 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
3728 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
3729 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
3730 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
3731 seq_printf(m, "# %.*s||| / delay\n", prec, space);
3732 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
3733 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
3734}
3735
3736void
3737print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3738{
3739 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3740 struct trace_buffer *buf = iter->trace_buffer;
3741 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3742 struct tracer *type = iter->trace;
3743 unsigned long entries;
3744 unsigned long total;
3745 const char *name = "preemption";
3746
3747 name = type->name;
3748
3749 get_total_entries(buf, &total, &entries);
3750
3751 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3752 name, UTS_RELEASE);
3753 seq_puts(m, "# -----------------------------------"
3754 "---------------------------------\n");
3755 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3756 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3757 nsecs_to_usecs(data->saved_latency),
3758 entries,
3759 total,
3760 buf->cpu,
3761#if defined(CONFIG_PREEMPT_NONE)
3762 "server",
3763#elif defined(CONFIG_PREEMPT_VOLUNTARY)
3764 "desktop",
3765#elif defined(CONFIG_PREEMPT)
3766 "preempt",
3767#else
3768 "unknown",
3769#endif
3770 /* These are reserved for later use */
3771 0, 0, 0, 0);
3772#ifdef CONFIG_SMP
3773 seq_printf(m, " #P:%d)\n", num_online_cpus());
3774#else
3775 seq_puts(m, ")\n");
3776#endif
3777 seq_puts(m, "# -----------------\n");
3778 seq_printf(m, "# | task: %.16s-%d "
3779 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3780 data->comm, data->pid,
3781 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3782 data->policy, data->rt_priority);
3783 seq_puts(m, "# -----------------\n");
3784
3785 if (data->critical_start) {
3786 seq_puts(m, "# => started at: ");
3787 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3788 trace_print_seq(m, &iter->seq);
3789 seq_puts(m, "\n# => ended at: ");
3790 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3791 trace_print_seq(m, &iter->seq);
3792 seq_puts(m, "\n#\n");
3793 }
3794
3795 seq_puts(m, "#\n");
3796}
3797
3798static void test_cpu_buff_start(struct trace_iterator *iter)
3799{
3800 struct trace_seq *s = &iter->seq;
3801 struct trace_array *tr = iter->tr;
3802
3803 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3804 return;
3805
3806 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3807 return;
3808
3809 if (cpumask_available(iter->started) &&
3810 cpumask_test_cpu(iter->cpu, iter->started))
3811 return;
3812
3813 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3814 return;
3815
3816 if (cpumask_available(iter->started))
3817 cpumask_set_cpu(iter->cpu, iter->started);
3818
3819 /* Don't print started cpu buffer for the first entry of the trace */
3820 if (iter->idx > 1)
3821 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3822 iter->cpu);
3823}
3824
3825static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3826{
3827 struct trace_array *tr = iter->tr;
3828 struct trace_seq *s = &iter->seq;
3829 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3830 struct trace_entry *entry;
3831 struct trace_event *event;
3832
3833 entry = iter->ent;
3834
3835 test_cpu_buff_start(iter);
3836
3837 event = ftrace_find_event(entry->type);
3838
3839 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3840 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3841 trace_print_lat_context(iter);
3842 else
3843 trace_print_context(iter);
3844 }
3845
3846 if (trace_seq_has_overflowed(s))
3847 return TRACE_TYPE_PARTIAL_LINE;
3848
3849 if (event)
3850 return event->funcs->trace(iter, sym_flags, event);
3851
3852 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3853
3854 return trace_handle_return(s);
3855}
3856
3857static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3858{
3859 struct trace_array *tr = iter->tr;
3860 struct trace_seq *s = &iter->seq;
3861 struct trace_entry *entry;
3862 struct trace_event *event;
3863
3864 entry = iter->ent;
3865
3866 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3867 trace_seq_printf(s, "%d %d %llu ",
3868 entry->pid, iter->cpu, iter->ts);
3869
3870 if (trace_seq_has_overflowed(s))
3871 return TRACE_TYPE_PARTIAL_LINE;
3872
3873 event = ftrace_find_event(entry->type);
3874 if (event)
3875 return event->funcs->raw(iter, 0, event);
3876
3877 trace_seq_printf(s, "%d ?\n", entry->type);
3878
3879 return trace_handle_return(s);
3880}
3881
3882static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3883{
3884 struct trace_array *tr = iter->tr;
3885 struct trace_seq *s = &iter->seq;
3886 unsigned char newline = '\n';
3887 struct trace_entry *entry;
3888 struct trace_event *event;
3889
3890 entry = iter->ent;
3891
3892 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3893 SEQ_PUT_HEX_FIELD(s, entry->pid);
3894 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3895 SEQ_PUT_HEX_FIELD(s, iter->ts);
3896 if (trace_seq_has_overflowed(s))
3897 return TRACE_TYPE_PARTIAL_LINE;
3898 }
3899
3900 event = ftrace_find_event(entry->type);
3901 if (event) {
3902 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3903 if (ret != TRACE_TYPE_HANDLED)
3904 return ret;
3905 }
3906
3907 SEQ_PUT_FIELD(s, newline);
3908
3909 return trace_handle_return(s);
3910}
3911
3912static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3913{
3914 struct trace_array *tr = iter->tr;
3915 struct trace_seq *s = &iter->seq;
3916 struct trace_entry *entry;
3917 struct trace_event *event;
3918
3919 entry = iter->ent;
3920
3921 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3922 SEQ_PUT_FIELD(s, entry->pid);
3923 SEQ_PUT_FIELD(s, iter->cpu);
3924 SEQ_PUT_FIELD(s, iter->ts);
3925 if (trace_seq_has_overflowed(s))
3926 return TRACE_TYPE_PARTIAL_LINE;
3927 }
3928
3929 event = ftrace_find_event(entry->type);
3930 return event ? event->funcs->binary(iter, 0, event) :
3931 TRACE_TYPE_HANDLED;
3932}
3933
3934int trace_empty(struct trace_iterator *iter)
3935{
3936 struct ring_buffer_iter *buf_iter;
3937 int cpu;
3938
3939 /* If we are looking at one CPU buffer, only check that one */
3940 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3941 cpu = iter->cpu_file;
3942 buf_iter = trace_buffer_iter(iter, cpu);
3943 if (buf_iter) {
3944 if (!ring_buffer_iter_empty(buf_iter))
3945 return 0;
3946 } else {
3947 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3948 return 0;
3949 }
3950 return 1;
3951 }
3952
3953 for_each_tracing_cpu(cpu) {
3954 buf_iter = trace_buffer_iter(iter, cpu);
3955 if (buf_iter) {
3956 if (!ring_buffer_iter_empty(buf_iter))
3957 return 0;
3958 } else {
3959 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3960 return 0;
3961 }
3962 }
3963
3964 return 1;
3965}
3966
3967/* Called with trace_event_read_lock() held. */
3968enum print_line_t print_trace_line(struct trace_iterator *iter)
3969{
3970 struct trace_array *tr = iter->tr;
3971 unsigned long trace_flags = tr->trace_flags;
3972 enum print_line_t ret;
3973
3974 if (iter->lost_events) {
3975 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3976 iter->cpu, iter->lost_events);
3977 if (trace_seq_has_overflowed(&iter->seq))
3978 return TRACE_TYPE_PARTIAL_LINE;
3979 }
3980
3981 if (iter->trace && iter->trace->print_line) {
3982 ret = iter->trace->print_line(iter);
3983 if (ret != TRACE_TYPE_UNHANDLED)
3984 return ret;
3985 }
3986
3987 if (iter->ent->type == TRACE_BPUTS &&
3988 trace_flags & TRACE_ITER_PRINTK &&
3989 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3990 return trace_print_bputs_msg_only(iter);
3991
3992 if (iter->ent->type == TRACE_BPRINT &&
3993 trace_flags & TRACE_ITER_PRINTK &&
3994 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3995 return trace_print_bprintk_msg_only(iter);
3996
3997 if (iter->ent->type == TRACE_PRINT &&
3998 trace_flags & TRACE_ITER_PRINTK &&
3999 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4000 return trace_print_printk_msg_only(iter);
4001
4002 if (trace_flags & TRACE_ITER_BIN)
4003 return print_bin_fmt(iter);
4004
4005 if (trace_flags & TRACE_ITER_HEX)
4006 return print_hex_fmt(iter);
4007
4008 if (trace_flags & TRACE_ITER_RAW)
4009 return print_raw_fmt(iter);
4010
4011 return print_trace_fmt(iter);
4012}
4013
4014void trace_latency_header(struct seq_file *m)
4015{
4016 struct trace_iterator *iter = m->private;
4017 struct trace_array *tr = iter->tr;
4018
4019 /* print nothing if the buffers are empty */
4020 if (trace_empty(iter))
4021 return;
4022
4023 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4024 print_trace_header(m, iter);
4025
4026 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4027 print_lat_help_header(m);
4028}
4029
4030void trace_default_header(struct seq_file *m)
4031{
4032 struct trace_iterator *iter = m->private;
4033 struct trace_array *tr = iter->tr;
4034 unsigned long trace_flags = tr->trace_flags;
4035
4036 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4037 return;
4038
4039 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4040 /* print nothing if the buffers are empty */
4041 if (trace_empty(iter))
4042 return;
4043 print_trace_header(m, iter);
4044 if (!(trace_flags & TRACE_ITER_VERBOSE))
4045 print_lat_help_header(m);
4046 } else {
4047 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4048 if (trace_flags & TRACE_ITER_IRQ_INFO)
4049 print_func_help_header_irq(iter->trace_buffer,
4050 m, trace_flags);
4051 else
4052 print_func_help_header(iter->trace_buffer, m,
4053 trace_flags);
4054 }
4055 }
4056}
4057
4058static void test_ftrace_alive(struct seq_file *m)
4059{
4060 if (!ftrace_is_dead())
4061 return;
4062 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4063 "# MAY BE MISSING FUNCTION EVENTS\n");
4064}
4065
4066#ifdef CONFIG_TRACER_MAX_TRACE
4067static void show_snapshot_main_help(struct seq_file *m)
4068{
4069 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4070 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4071 "# Takes a snapshot of the main buffer.\n"
4072 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4073 "# (Doesn't have to be '2' works with any number that\n"
4074 "# is not a '0' or '1')\n");
4075}
4076
4077static void show_snapshot_percpu_help(struct seq_file *m)
4078{
4079 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4080#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4081 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4082 "# Takes a snapshot of the main buffer for this cpu.\n");
4083#else
4084 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4085 "# Must use main snapshot file to allocate.\n");
4086#endif
4087 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4088 "# (Doesn't have to be '2' works with any number that\n"
4089 "# is not a '0' or '1')\n");
4090}
4091
4092static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4093{
4094 if (iter->tr->allocated_snapshot)
4095 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4096 else
4097 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4098
4099 seq_puts(m, "# Snapshot commands:\n");
4100 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4101 show_snapshot_main_help(m);
4102 else
4103 show_snapshot_percpu_help(m);
4104}
4105#else
4106/* Should never be called */
4107static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4108#endif
4109
4110static int s_show(struct seq_file *m, void *v)
4111{
4112 struct trace_iterator *iter = v;
4113 int ret;
4114
4115 if (iter->ent == NULL) {
4116 if (iter->tr) {
4117 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4118 seq_puts(m, "#\n");
4119 test_ftrace_alive(m);
4120 }
4121 if (iter->snapshot && trace_empty(iter))
4122 print_snapshot_help(m, iter);
4123 else if (iter->trace && iter->trace->print_header)
4124 iter->trace->print_header(m);
4125 else
4126 trace_default_header(m);
4127
4128 } else if (iter->leftover) {
4129 /*
4130 * If we filled the seq_file buffer earlier, we
4131 * want to just show it now.
4132 */
4133 ret = trace_print_seq(m, &iter->seq);
4134
4135 /* ret should this time be zero, but you never know */
4136 iter->leftover = ret;
4137
4138 } else {
4139 ret = print_trace_line(iter);
4140 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4141 iter->seq.full = 0;
4142 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4143 }
4144 ret = trace_print_seq(m, &iter->seq);
4145 /*
4146 * If we overflow the seq_file buffer, then it will
4147 * ask us for this data again at start up.
4148 * Use that instead.
4149 * ret is 0 if seq_file write succeeded.
4150 * -1 otherwise.
4151 */
4152 iter->leftover = ret;
4153 }
4154
4155 return 0;
4156}
4157
4158/*
4159 * Should be used after trace_array_get(), trace_types_lock
4160 * ensures that i_cdev was already initialized.
4161 */
4162static inline int tracing_get_cpu(struct inode *inode)
4163{
4164 if (inode->i_cdev) /* See trace_create_cpu_file() */
4165 return (long)inode->i_cdev - 1;
4166 return RING_BUFFER_ALL_CPUS;
4167}
4168
4169static const struct seq_operations tracer_seq_ops = {
4170 .start = s_start,
4171 .next = s_next,
4172 .stop = s_stop,
4173 .show = s_show,
4174};
4175
4176static struct trace_iterator *
4177__tracing_open(struct inode *inode, struct file *file, bool snapshot)
4178{
4179 struct trace_array *tr = inode->i_private;
4180 struct trace_iterator *iter;
4181 int cpu;
4182
4183 if (tracing_disabled)
4184 return ERR_PTR(-ENODEV);
4185
4186 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4187 if (!iter)
4188 return ERR_PTR(-ENOMEM);
4189
4190 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4191 GFP_KERNEL);
4192 if (!iter->buffer_iter)
4193 goto release;
4194
4195 /*
4196 * We make a copy of the current tracer to avoid concurrent
4197 * changes on it while we are reading.
4198 */
4199 mutex_lock(&trace_types_lock);
4200 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4201 if (!iter->trace)
4202 goto fail;
4203
4204 *iter->trace = *tr->current_trace;
4205
4206 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4207 goto fail;
4208
4209 iter->tr = tr;
4210
4211#ifdef CONFIG_TRACER_MAX_TRACE
4212 /* Currently only the top directory has a snapshot */
4213 if (tr->current_trace->print_max || snapshot)
4214 iter->trace_buffer = &tr->max_buffer;
4215 else
4216#endif
4217 iter->trace_buffer = &tr->trace_buffer;
4218 iter->snapshot = snapshot;
4219 iter->pos = -1;
4220 iter->cpu_file = tracing_get_cpu(inode);
4221 mutex_init(&iter->mutex);
4222
4223 /* Notify the tracer early; before we stop tracing. */
4224 if (iter->trace && iter->trace->open)
4225 iter->trace->open(iter);
4226
4227 /* Annotate start of buffers if we had overruns */
4228 if (ring_buffer_overruns(iter->trace_buffer->buffer))
4229 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4230
4231 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4232 if (trace_clocks[tr->clock_id].in_ns)
4233 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4234
4235 /* stop the trace while dumping if we are not opening "snapshot" */
4236 if (!iter->snapshot)
4237 tracing_stop_tr(tr);
4238
4239 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4240 for_each_tracing_cpu(cpu) {
4241 iter->buffer_iter[cpu] =
4242 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4243 cpu, GFP_KERNEL);
4244 }
4245 ring_buffer_read_prepare_sync();
4246 for_each_tracing_cpu(cpu) {
4247 ring_buffer_read_start(iter->buffer_iter[cpu]);
4248 tracing_iter_reset(iter, cpu);
4249 }
4250 } else {
4251 cpu = iter->cpu_file;
4252 iter->buffer_iter[cpu] =
4253 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4254 cpu, GFP_KERNEL);
4255 ring_buffer_read_prepare_sync();
4256 ring_buffer_read_start(iter->buffer_iter[cpu]);
4257 tracing_iter_reset(iter, cpu);
4258 }
4259
4260 mutex_unlock(&trace_types_lock);
4261
4262 return iter;
4263
4264 fail:
4265 mutex_unlock(&trace_types_lock);
4266 kfree(iter->trace);
4267 kfree(iter->buffer_iter);
4268release:
4269 seq_release_private(inode, file);
4270 return ERR_PTR(-ENOMEM);
4271}
4272
4273int tracing_open_generic(struct inode *inode, struct file *filp)
4274{
4275 int ret;
4276
4277 ret = tracing_check_open_get_tr(NULL);
4278 if (ret)
4279 return ret;
4280
4281 filp->private_data = inode->i_private;
4282 return 0;
4283}
4284
4285bool tracing_is_disabled(void)
4286{
4287 return (tracing_disabled) ? true: false;
4288}
4289
4290/*
4291 * Open and update trace_array ref count.
4292 * Must have the current trace_array passed to it.
4293 */
4294int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4295{
4296 struct trace_array *tr = inode->i_private;
4297 int ret;
4298
4299 ret = tracing_check_open_get_tr(tr);
4300 if (ret)
4301 return ret;
4302
4303 filp->private_data = inode->i_private;
4304
4305 return 0;
4306}
4307
4308/*
4309 * The private pointer of the inode is the trace_event_file.
4310 * Update the tr ref count associated to it.
4311 */
4312int tracing_open_file_tr(struct inode *inode, struct file *filp)
4313{
4314 struct trace_event_file *file = inode->i_private;
4315 int ret;
4316
4317 ret = tracing_check_open_get_tr(file->tr);
4318 if (ret)
4319 return ret;
4320
4321 mutex_lock(&event_mutex);
4322
4323 /* Fail if the file is marked for removal */
4324 if (file->flags & EVENT_FILE_FL_FREED) {
4325 trace_array_put(file->tr);
4326 ret = -ENODEV;
4327 } else {
4328 event_file_get(file);
4329 }
4330
4331 mutex_unlock(&event_mutex);
4332 if (ret)
4333 return ret;
4334
4335 filp->private_data = inode->i_private;
4336
4337 return 0;
4338}
4339
4340int tracing_release_file_tr(struct inode *inode, struct file *filp)
4341{
4342 struct trace_event_file *file = inode->i_private;
4343
4344 trace_array_put(file->tr);
4345 event_file_put(file);
4346
4347 return 0;
4348}
4349
4350static int tracing_release(struct inode *inode, struct file *file)
4351{
4352 struct trace_array *tr = inode->i_private;
4353 struct seq_file *m = file->private_data;
4354 struct trace_iterator *iter;
4355 int cpu;
4356
4357 if (!(file->f_mode & FMODE_READ)) {
4358 trace_array_put(tr);
4359 return 0;
4360 }
4361
4362 /* Writes do not use seq_file */
4363 iter = m->private;
4364 mutex_lock(&trace_types_lock);
4365
4366 for_each_tracing_cpu(cpu) {
4367 if (iter->buffer_iter[cpu])
4368 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4369 }
4370
4371 if (iter->trace && iter->trace->close)
4372 iter->trace->close(iter);
4373
4374 if (!iter->snapshot)
4375 /* reenable tracing if it was previously enabled */
4376 tracing_start_tr(tr);
4377
4378 __trace_array_put(tr);
4379
4380 mutex_unlock(&trace_types_lock);
4381
4382 mutex_destroy(&iter->mutex);
4383 free_cpumask_var(iter->started);
4384 kfree(iter->trace);
4385 kfree(iter->buffer_iter);
4386 seq_release_private(inode, file);
4387
4388 return 0;
4389}
4390
4391static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4392{
4393 struct trace_array *tr = inode->i_private;
4394
4395 trace_array_put(tr);
4396 return 0;
4397}
4398
4399static int tracing_single_release_tr(struct inode *inode, struct file *file)
4400{
4401 struct trace_array *tr = inode->i_private;
4402
4403 trace_array_put(tr);
4404
4405 return single_release(inode, file);
4406}
4407
4408static int tracing_open(struct inode *inode, struct file *file)
4409{
4410 struct trace_array *tr = inode->i_private;
4411 struct trace_iterator *iter;
4412 int ret;
4413
4414 ret = tracing_check_open_get_tr(tr);
4415 if (ret)
4416 return ret;
4417
4418 /* If this file was open for write, then erase contents */
4419 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4420 int cpu = tracing_get_cpu(inode);
4421 struct trace_buffer *trace_buf = &tr->trace_buffer;
4422
4423#ifdef CONFIG_TRACER_MAX_TRACE
4424 if (tr->current_trace->print_max)
4425 trace_buf = &tr->max_buffer;
4426#endif
4427
4428 if (cpu == RING_BUFFER_ALL_CPUS)
4429 tracing_reset_online_cpus(trace_buf);
4430 else
4431 tracing_reset_cpu(trace_buf, cpu);
4432 }
4433
4434 if (file->f_mode & FMODE_READ) {
4435 iter = __tracing_open(inode, file, false);
4436 if (IS_ERR(iter))
4437 ret = PTR_ERR(iter);
4438 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4439 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4440 }
4441
4442 if (ret < 0)
4443 trace_array_put(tr);
4444
4445 return ret;
4446}
4447
4448/*
4449 * Some tracers are not suitable for instance buffers.
4450 * A tracer is always available for the global array (toplevel)
4451 * or if it explicitly states that it is.
4452 */
4453static bool
4454trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4455{
4456 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4457}
4458
4459/* Find the next tracer that this trace array may use */
4460static struct tracer *
4461get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4462{
4463 while (t && !trace_ok_for_array(t, tr))
4464 t = t->next;
4465
4466 return t;
4467}
4468
4469static void *
4470t_next(struct seq_file *m, void *v, loff_t *pos)
4471{
4472 struct trace_array *tr = m->private;
4473 struct tracer *t = v;
4474
4475 (*pos)++;
4476
4477 if (t)
4478 t = get_tracer_for_array(tr, t->next);
4479
4480 return t;
4481}
4482
4483static void *t_start(struct seq_file *m, loff_t *pos)
4484{
4485 struct trace_array *tr = m->private;
4486 struct tracer *t;
4487 loff_t l = 0;
4488
4489 mutex_lock(&trace_types_lock);
4490
4491 t = get_tracer_for_array(tr, trace_types);
4492 for (; t && l < *pos; t = t_next(m, t, &l))
4493 ;
4494
4495 return t;
4496}
4497
4498static void t_stop(struct seq_file *m, void *p)
4499{
4500 mutex_unlock(&trace_types_lock);
4501}
4502
4503static int t_show(struct seq_file *m, void *v)
4504{
4505 struct tracer *t = v;
4506
4507 if (!t)
4508 return 0;
4509
4510 seq_puts(m, t->name);
4511 if (t->next)
4512 seq_putc(m, ' ');
4513 else
4514 seq_putc(m, '\n');
4515
4516 return 0;
4517}
4518
4519static const struct seq_operations show_traces_seq_ops = {
4520 .start = t_start,
4521 .next = t_next,
4522 .stop = t_stop,
4523 .show = t_show,
4524};
4525
4526static int show_traces_open(struct inode *inode, struct file *file)
4527{
4528 struct trace_array *tr = inode->i_private;
4529 struct seq_file *m;
4530 int ret;
4531
4532 ret = tracing_check_open_get_tr(tr);
4533 if (ret)
4534 return ret;
4535
4536 ret = seq_open(file, &show_traces_seq_ops);
4537 if (ret) {
4538 trace_array_put(tr);
4539 return ret;
4540 }
4541
4542 m = file->private_data;
4543 m->private = tr;
4544
4545 return 0;
4546}
4547
4548static int show_traces_release(struct inode *inode, struct file *file)
4549{
4550 struct trace_array *tr = inode->i_private;
4551
4552 trace_array_put(tr);
4553 return seq_release(inode, file);
4554}
4555
4556static ssize_t
4557tracing_write_stub(struct file *filp, const char __user *ubuf,
4558 size_t count, loff_t *ppos)
4559{
4560 return count;
4561}
4562
4563loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4564{
4565 int ret;
4566
4567 if (file->f_mode & FMODE_READ)
4568 ret = seq_lseek(file, offset, whence);
4569 else
4570 file->f_pos = ret = 0;
4571
4572 return ret;
4573}
4574
4575static const struct file_operations tracing_fops = {
4576 .open = tracing_open,
4577 .read = seq_read,
4578 .write = tracing_write_stub,
4579 .llseek = tracing_lseek,
4580 .release = tracing_release,
4581};
4582
4583static const struct file_operations show_traces_fops = {
4584 .open = show_traces_open,
4585 .read = seq_read,
4586 .llseek = seq_lseek,
4587 .release = show_traces_release,
4588};
4589
4590static ssize_t
4591tracing_cpumask_read(struct file *filp, char __user *ubuf,
4592 size_t count, loff_t *ppos)
4593{
4594 struct trace_array *tr = file_inode(filp)->i_private;
4595 char *mask_str;
4596 int len;
4597
4598 len = snprintf(NULL, 0, "%*pb\n",
4599 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4600 mask_str = kmalloc(len, GFP_KERNEL);
4601 if (!mask_str)
4602 return -ENOMEM;
4603
4604 len = snprintf(mask_str, len, "%*pb\n",
4605 cpumask_pr_args(tr->tracing_cpumask));
4606 if (len >= count) {
4607 count = -EINVAL;
4608 goto out_err;
4609 }
4610 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4611
4612out_err:
4613 kfree(mask_str);
4614
4615 return count;
4616}
4617
4618static ssize_t
4619tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4620 size_t count, loff_t *ppos)
4621{
4622 struct trace_array *tr = file_inode(filp)->i_private;
4623 cpumask_var_t tracing_cpumask_new;
4624 int err, cpu;
4625
4626 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4627 return -ENOMEM;
4628
4629 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4630 if (err)
4631 goto err_unlock;
4632
4633 local_irq_disable();
4634 arch_spin_lock(&tr->max_lock);
4635 for_each_tracing_cpu(cpu) {
4636 /*
4637 * Increase/decrease the disabled counter if we are
4638 * about to flip a bit in the cpumask:
4639 */
4640 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4641 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4642 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4643 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4644 }
4645 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4646 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4647 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4648 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4649 }
4650 }
4651 arch_spin_unlock(&tr->max_lock);
4652 local_irq_enable();
4653
4654 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4655 free_cpumask_var(tracing_cpumask_new);
4656
4657 return count;
4658
4659err_unlock:
4660 free_cpumask_var(tracing_cpumask_new);
4661
4662 return err;
4663}
4664
4665static const struct file_operations tracing_cpumask_fops = {
4666 .open = tracing_open_generic_tr,
4667 .read = tracing_cpumask_read,
4668 .write = tracing_cpumask_write,
4669 .release = tracing_release_generic_tr,
4670 .llseek = generic_file_llseek,
4671};
4672
4673static int tracing_trace_options_show(struct seq_file *m, void *v)
4674{
4675 struct tracer_opt *trace_opts;
4676 struct trace_array *tr = m->private;
4677 u32 tracer_flags;
4678 int i;
4679
4680 mutex_lock(&trace_types_lock);
4681 tracer_flags = tr->current_trace->flags->val;
4682 trace_opts = tr->current_trace->flags->opts;
4683
4684 for (i = 0; trace_options[i]; i++) {
4685 if (tr->trace_flags & (1 << i))
4686 seq_printf(m, "%s\n", trace_options[i]);
4687 else
4688 seq_printf(m, "no%s\n", trace_options[i]);
4689 }
4690
4691 for (i = 0; trace_opts[i].name; i++) {
4692 if (tracer_flags & trace_opts[i].bit)
4693 seq_printf(m, "%s\n", trace_opts[i].name);
4694 else
4695 seq_printf(m, "no%s\n", trace_opts[i].name);
4696 }
4697 mutex_unlock(&trace_types_lock);
4698
4699 return 0;
4700}
4701
4702static int __set_tracer_option(struct trace_array *tr,
4703 struct tracer_flags *tracer_flags,
4704 struct tracer_opt *opts, int neg)
4705{
4706 struct tracer *trace = tracer_flags->trace;
4707 int ret;
4708
4709 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4710 if (ret)
4711 return ret;
4712
4713 if (neg)
4714 tracer_flags->val &= ~opts->bit;
4715 else
4716 tracer_flags->val |= opts->bit;
4717 return 0;
4718}
4719
4720/* Try to assign a tracer specific option */
4721static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4722{
4723 struct tracer *trace = tr->current_trace;
4724 struct tracer_flags *tracer_flags = trace->flags;
4725 struct tracer_opt *opts = NULL;
4726 int i;
4727
4728 for (i = 0; tracer_flags->opts[i].name; i++) {
4729 opts = &tracer_flags->opts[i];
4730
4731 if (strcmp(cmp, opts->name) == 0)
4732 return __set_tracer_option(tr, trace->flags, opts, neg);
4733 }
4734
4735 return -EINVAL;
4736}
4737
4738/* Some tracers require overwrite to stay enabled */
4739int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4740{
4741 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4742 return -1;
4743
4744 return 0;
4745}
4746
4747int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4748{
4749 int *map;
4750
4751 if ((mask == TRACE_ITER_RECORD_TGID) ||
4752 (mask == TRACE_ITER_RECORD_CMD))
4753 lockdep_assert_held(&event_mutex);
4754
4755 /* do nothing if flag is already set */
4756 if (!!(tr->trace_flags & mask) == !!enabled)
4757 return 0;
4758
4759 /* Give the tracer a chance to approve the change */
4760 if (tr->current_trace->flag_changed)
4761 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4762 return -EINVAL;
4763
4764 if (enabled)
4765 tr->trace_flags |= mask;
4766 else
4767 tr->trace_flags &= ~mask;
4768
4769 if (mask == TRACE_ITER_RECORD_CMD)
4770 trace_event_enable_cmd_record(enabled);
4771
4772 if (mask == TRACE_ITER_RECORD_TGID) {
4773 if (!tgid_map) {
4774 tgid_map_max = pid_max;
4775 map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
4776 GFP_KERNEL);
4777
4778 /*
4779 * Pairs with smp_load_acquire() in
4780 * trace_find_tgid_ptr() to ensure that if it observes
4781 * the tgid_map we just allocated then it also observes
4782 * the corresponding tgid_map_max value.
4783 */
4784 smp_store_release(&tgid_map, map);
4785 }
4786 if (!tgid_map) {
4787 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4788 return -ENOMEM;
4789 }
4790
4791 trace_event_enable_tgid_record(enabled);
4792 }
4793
4794 if (mask == TRACE_ITER_EVENT_FORK)
4795 trace_event_follow_fork(tr, enabled);
4796
4797 if (mask == TRACE_ITER_FUNC_FORK)
4798 ftrace_pid_follow_fork(tr, enabled);
4799
4800 if (mask == TRACE_ITER_OVERWRITE) {
4801 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4802#ifdef CONFIG_TRACER_MAX_TRACE
4803 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4804#endif
4805 }
4806
4807 if (mask == TRACE_ITER_PRINTK) {
4808 trace_printk_start_stop_comm(enabled);
4809 trace_printk_control(enabled);
4810 }
4811
4812 return 0;
4813}
4814
4815static int trace_set_options(struct trace_array *tr, char *option)
4816{
4817 char *cmp;
4818 int neg = 0;
4819 int ret;
4820 size_t orig_len = strlen(option);
4821 int len;
4822
4823 cmp = strstrip(option);
4824
4825 len = str_has_prefix(cmp, "no");
4826 if (len)
4827 neg = 1;
4828
4829 cmp += len;
4830
4831 mutex_lock(&event_mutex);
4832 mutex_lock(&trace_types_lock);
4833
4834 ret = match_string(trace_options, -1, cmp);
4835 /* If no option could be set, test the specific tracer options */
4836 if (ret < 0)
4837 ret = set_tracer_option(tr, cmp, neg);
4838 else
4839 ret = set_tracer_flag(tr, 1 << ret, !neg);
4840
4841 mutex_unlock(&trace_types_lock);
4842 mutex_unlock(&event_mutex);
4843
4844 /*
4845 * If the first trailing whitespace is replaced with '\0' by strstrip,
4846 * turn it back into a space.
4847 */
4848 if (orig_len > strlen(option))
4849 option[strlen(option)] = ' ';
4850
4851 return ret;
4852}
4853
4854static void __init apply_trace_boot_options(void)
4855{
4856 char *buf = trace_boot_options_buf;
4857 char *option;
4858
4859 while (true) {
4860 option = strsep(&buf, ",");
4861
4862 if (!option)
4863 break;
4864
4865 if (*option)
4866 trace_set_options(&global_trace, option);
4867
4868 /* Put back the comma to allow this to be called again */
4869 if (buf)
4870 *(buf - 1) = ',';
4871 }
4872}
4873
4874static ssize_t
4875tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4876 size_t cnt, loff_t *ppos)
4877{
4878 struct seq_file *m = filp->private_data;
4879 struct trace_array *tr = m->private;
4880 char buf[64];
4881 int ret;
4882
4883 if (cnt >= sizeof(buf))
4884 return -EINVAL;
4885
4886 if (copy_from_user(buf, ubuf, cnt))
4887 return -EFAULT;
4888
4889 buf[cnt] = 0;
4890
4891 ret = trace_set_options(tr, buf);
4892 if (ret < 0)
4893 return ret;
4894
4895 *ppos += cnt;
4896
4897 return cnt;
4898}
4899
4900static int tracing_trace_options_open(struct inode *inode, struct file *file)
4901{
4902 struct trace_array *tr = inode->i_private;
4903 int ret;
4904
4905 ret = tracing_check_open_get_tr(tr);
4906 if (ret)
4907 return ret;
4908
4909 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4910 if (ret < 0)
4911 trace_array_put(tr);
4912
4913 return ret;
4914}
4915
4916static const struct file_operations tracing_iter_fops = {
4917 .open = tracing_trace_options_open,
4918 .read = seq_read,
4919 .llseek = seq_lseek,
4920 .release = tracing_single_release_tr,
4921 .write = tracing_trace_options_write,
4922};
4923
4924static const char readme_msg[] =
4925 "tracing mini-HOWTO:\n\n"
4926 "# echo 0 > tracing_on : quick way to disable tracing\n"
4927 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4928 " Important files:\n"
4929 " trace\t\t\t- The static contents of the buffer\n"
4930 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4931 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4932 " current_tracer\t- function and latency tracers\n"
4933 " available_tracers\t- list of configured tracers for current_tracer\n"
4934 " error_log\t- error log for failed commands (that support it)\n"
4935 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4936 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4937 " trace_clock\t\t-change the clock used to order events\n"
4938 " local: Per cpu clock but may not be synced across CPUs\n"
4939 " global: Synced across CPUs but slows tracing down.\n"
4940 " counter: Not a clock, but just an increment\n"
4941 " uptime: Jiffy counter from time of boot\n"
4942 " perf: Same clock that perf events use\n"
4943#ifdef CONFIG_X86_64
4944 " x86-tsc: TSC cycle counter\n"
4945#endif
4946 "\n timestamp_mode\t-view the mode used to timestamp events\n"
4947 " delta: Delta difference against a buffer-wide timestamp\n"
4948 " absolute: Absolute (standalone) timestamp\n"
4949 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4950 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4951 " tracing_cpumask\t- Limit which CPUs to trace\n"
4952 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4953 "\t\t\t Remove sub-buffer with rmdir\n"
4954 " trace_options\t\t- Set format or modify how tracing happens\n"
4955 "\t\t\t Disable an option by prefixing 'no' to the\n"
4956 "\t\t\t option name\n"
4957 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4958#ifdef CONFIG_DYNAMIC_FTRACE
4959 "\n available_filter_functions - list of functions that can be filtered on\n"
4960 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4961 "\t\t\t functions\n"
4962 "\t accepts: func_full_name or glob-matching-pattern\n"
4963 "\t modules: Can select a group via module\n"
4964 "\t Format: :mod:<module-name>\n"
4965 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4966 "\t triggers: a command to perform when function is hit\n"
4967 "\t Format: <function>:<trigger>[:count]\n"
4968 "\t trigger: traceon, traceoff\n"
4969 "\t\t enable_event:<system>:<event>\n"
4970 "\t\t disable_event:<system>:<event>\n"
4971#ifdef CONFIG_STACKTRACE
4972 "\t\t stacktrace\n"
4973#endif
4974#ifdef CONFIG_TRACER_SNAPSHOT
4975 "\t\t snapshot\n"
4976#endif
4977 "\t\t dump\n"
4978 "\t\t cpudump\n"
4979 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4980 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4981 "\t The first one will disable tracing every time do_fault is hit\n"
4982 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4983 "\t The first time do trap is hit and it disables tracing, the\n"
4984 "\t counter will decrement to 2. If tracing is already disabled,\n"
4985 "\t the counter will not decrement. It only decrements when the\n"
4986 "\t trigger did work\n"
4987 "\t To remove trigger without count:\n"
4988 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4989 "\t To remove trigger with a count:\n"
4990 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4991 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4992 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4993 "\t modules: Can select a group via module command :mod:\n"
4994 "\t Does not accept triggers\n"
4995#endif /* CONFIG_DYNAMIC_FTRACE */
4996#ifdef CONFIG_FUNCTION_TRACER
4997 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4998 "\t\t (function)\n"
4999#endif
5000#ifdef CONFIG_FUNCTION_GRAPH_TRACER
5001 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5002 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5003 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5004#endif
5005#ifdef CONFIG_TRACER_SNAPSHOT
5006 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5007 "\t\t\t snapshot buffer. Read the contents for more\n"
5008 "\t\t\t information\n"
5009#endif
5010#ifdef CONFIG_STACK_TRACER
5011 " stack_trace\t\t- Shows the max stack trace when active\n"
5012 " stack_max_size\t- Shows current max stack size that was traced\n"
5013 "\t\t\t Write into this file to reset the max size (trigger a\n"
5014 "\t\t\t new trace)\n"
5015#ifdef CONFIG_DYNAMIC_FTRACE
5016 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5017 "\t\t\t traces\n"
5018#endif
5019#endif /* CONFIG_STACK_TRACER */
5020#ifdef CONFIG_DYNAMIC_EVENTS
5021 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5022 "\t\t\t Write into this file to define/undefine new trace events.\n"
5023#endif
5024#ifdef CONFIG_KPROBE_EVENTS
5025 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5026 "\t\t\t Write into this file to define/undefine new trace events.\n"
5027#endif
5028#ifdef CONFIG_UPROBE_EVENTS
5029 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5030 "\t\t\t Write into this file to define/undefine new trace events.\n"
5031#endif
5032#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5033 "\t accepts: event-definitions (one definition per line)\n"
5034 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5035 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5036#ifdef CONFIG_HIST_TRIGGERS
5037 "\t s:[synthetic/]<event> <field> [<field>]\n"
5038#endif
5039 "\t -:[<group>/]<event>\n"
5040#ifdef CONFIG_KPROBE_EVENTS
5041 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5042 "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5043#endif
5044#ifdef CONFIG_UPROBE_EVENTS
5045 " place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
5046#endif
5047 "\t args: <name>=fetcharg[:type]\n"
5048 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5049#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5050 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5051#else
5052 "\t $stack<index>, $stack, $retval, $comm,\n"
5053#endif
5054 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5055 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5056 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5057 "\t <type>\\[<array-size>\\]\n"
5058#ifdef CONFIG_HIST_TRIGGERS
5059 "\t field: <stype> <name>;\n"
5060 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5061 "\t [unsigned] char/int/long\n"
5062#endif
5063#endif
5064 " events/\t\t- Directory containing all trace event subsystems:\n"
5065 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5066 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5067 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5068 "\t\t\t events\n"
5069 " filter\t\t- If set, only events passing filter are traced\n"
5070 " events/<system>/<event>/\t- Directory containing control files for\n"
5071 "\t\t\t <event>:\n"
5072 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5073 " filter\t\t- If set, only events passing filter are traced\n"
5074 " trigger\t\t- If set, a command to perform when event is hit\n"
5075 "\t Format: <trigger>[:count][if <filter>]\n"
5076 "\t trigger: traceon, traceoff\n"
5077 "\t enable_event:<system>:<event>\n"
5078 "\t disable_event:<system>:<event>\n"
5079#ifdef CONFIG_HIST_TRIGGERS
5080 "\t enable_hist:<system>:<event>\n"
5081 "\t disable_hist:<system>:<event>\n"
5082#endif
5083#ifdef CONFIG_STACKTRACE
5084 "\t\t stacktrace\n"
5085#endif
5086#ifdef CONFIG_TRACER_SNAPSHOT
5087 "\t\t snapshot\n"
5088#endif
5089#ifdef CONFIG_HIST_TRIGGERS
5090 "\t\t hist (see below)\n"
5091#endif
5092 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5093 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5094 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5095 "\t events/block/block_unplug/trigger\n"
5096 "\t The first disables tracing every time block_unplug is hit.\n"
5097 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5098 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5099 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5100 "\t Like function triggers, the counter is only decremented if it\n"
5101 "\t enabled or disabled tracing.\n"
5102 "\t To remove a trigger without a count:\n"
5103 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5104 "\t To remove a trigger with a count:\n"
5105 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5106 "\t Filters can be ignored when removing a trigger.\n"
5107#ifdef CONFIG_HIST_TRIGGERS
5108 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5109 "\t Format: hist:keys=<field1[,field2,...]>\n"
5110 "\t [:values=<field1[,field2,...]>]\n"
5111 "\t [:sort=<field1[,field2,...]>]\n"
5112 "\t [:size=#entries]\n"
5113 "\t [:pause][:continue][:clear]\n"
5114 "\t [:name=histname1]\n"
5115 "\t [:<handler>.<action>]\n"
5116 "\t [if <filter>]\n\n"
5117 "\t Note, special fields can be used as well:\n"
5118 "\t common_timestamp - to record current timestamp\n"
5119 "\t common_cpu - to record the CPU the event happened on\n"
5120 "\n"
5121 "\t When a matching event is hit, an entry is added to a hash\n"
5122 "\t table using the key(s) and value(s) named, and the value of a\n"
5123 "\t sum called 'hitcount' is incremented. Keys and values\n"
5124 "\t correspond to fields in the event's format description. Keys\n"
5125 "\t can be any field, or the special string 'stacktrace'.\n"
5126 "\t Compound keys consisting of up to two fields can be specified\n"
5127 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5128 "\t fields. Sort keys consisting of up to two fields can be\n"
5129 "\t specified using the 'sort' keyword. The sort direction can\n"
5130 "\t be modified by appending '.descending' or '.ascending' to a\n"
5131 "\t sort field. The 'size' parameter can be used to specify more\n"
5132 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5133 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5134 "\t its histogram data will be shared with other triggers of the\n"
5135 "\t same name, and trigger hits will update this common data.\n\n"
5136 "\t Reading the 'hist' file for the event will dump the hash\n"
5137 "\t table in its entirety to stdout. If there are multiple hist\n"
5138 "\t triggers attached to an event, there will be a table for each\n"
5139 "\t trigger in the output. The table displayed for a named\n"
5140 "\t trigger will be the same as any other instance having the\n"
5141 "\t same name. The default format used to display a given field\n"
5142 "\t can be modified by appending any of the following modifiers\n"
5143 "\t to the field name, as applicable:\n\n"
5144 "\t .hex display a number as a hex value\n"
5145 "\t .sym display an address as a symbol\n"
5146 "\t .sym-offset display an address as a symbol and offset\n"
5147 "\t .execname display a common_pid as a program name\n"
5148 "\t .syscall display a syscall id as a syscall name\n"
5149 "\t .log2 display log2 value rather than raw number\n"
5150 "\t .usecs display a common_timestamp in microseconds\n\n"
5151 "\t The 'pause' parameter can be used to pause an existing hist\n"
5152 "\t trigger or to start a hist trigger but not log any events\n"
5153 "\t until told to do so. 'continue' can be used to start or\n"
5154 "\t restart a paused hist trigger.\n\n"
5155 "\t The 'clear' parameter will clear the contents of a running\n"
5156 "\t hist trigger and leave its current paused/active state\n"
5157 "\t unchanged.\n\n"
5158 "\t The enable_hist and disable_hist triggers can be used to\n"
5159 "\t have one event conditionally start and stop another event's\n"
5160 "\t already-attached hist trigger. The syntax is analogous to\n"
5161 "\t the enable_event and disable_event triggers.\n\n"
5162 "\t Hist trigger handlers and actions are executed whenever a\n"
5163 "\t a histogram entry is added or updated. They take the form:\n\n"
5164 "\t <handler>.<action>\n\n"
5165 "\t The available handlers are:\n\n"
5166 "\t onmatch(matching.event) - invoke on addition or update\n"
5167 "\t onmax(var) - invoke if var exceeds current max\n"
5168 "\t onchange(var) - invoke action if var changes\n\n"
5169 "\t The available actions are:\n\n"
5170 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5171 "\t save(field,...) - save current event fields\n"
5172#ifdef CONFIG_TRACER_SNAPSHOT
5173 "\t snapshot() - snapshot the trace buffer\n"
5174#endif
5175#endif
5176;
5177
5178static ssize_t
5179tracing_readme_read(struct file *filp, char __user *ubuf,
5180 size_t cnt, loff_t *ppos)
5181{
5182 return simple_read_from_buffer(ubuf, cnt, ppos,
5183 readme_msg, strlen(readme_msg));
5184}
5185
5186static const struct file_operations tracing_readme_fops = {
5187 .open = tracing_open_generic,
5188 .read = tracing_readme_read,
5189 .llseek = generic_file_llseek,
5190};
5191
5192static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5193{
5194 int pid = ++(*pos);
5195
5196 return trace_find_tgid_ptr(pid);
5197}
5198
5199static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5200{
5201 int pid = *pos;
5202
5203 return trace_find_tgid_ptr(pid);
5204}
5205
5206static void saved_tgids_stop(struct seq_file *m, void *v)
5207{
5208}
5209
5210static int saved_tgids_show(struct seq_file *m, void *v)
5211{
5212 int *entry = (int *)v;
5213 int pid = entry - tgid_map;
5214 int tgid = *entry;
5215
5216 if (tgid == 0)
5217 return SEQ_SKIP;
5218
5219 seq_printf(m, "%d %d\n", pid, tgid);
5220 return 0;
5221}
5222
5223static const struct seq_operations tracing_saved_tgids_seq_ops = {
5224 .start = saved_tgids_start,
5225 .stop = saved_tgids_stop,
5226 .next = saved_tgids_next,
5227 .show = saved_tgids_show,
5228};
5229
5230static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5231{
5232 int ret;
5233
5234 ret = tracing_check_open_get_tr(NULL);
5235 if (ret)
5236 return ret;
5237
5238 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5239}
5240
5241
5242static const struct file_operations tracing_saved_tgids_fops = {
5243 .open = tracing_saved_tgids_open,
5244 .read = seq_read,
5245 .llseek = seq_lseek,
5246 .release = seq_release,
5247};
5248
5249static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5250{
5251 unsigned int *ptr = v;
5252
5253 if (*pos || m->count)
5254 ptr++;
5255
5256 (*pos)++;
5257
5258 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5259 ptr++) {
5260 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5261 continue;
5262
5263 return ptr;
5264 }
5265
5266 return NULL;
5267}
5268
5269static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5270{
5271 void *v;
5272 loff_t l = 0;
5273
5274 preempt_disable();
5275 arch_spin_lock(&trace_cmdline_lock);
5276
5277 v = &savedcmd->map_cmdline_to_pid[0];
5278 while (l <= *pos) {
5279 v = saved_cmdlines_next(m, v, &l);
5280 if (!v)
5281 return NULL;
5282 }
5283
5284 return v;
5285}
5286
5287static void saved_cmdlines_stop(struct seq_file *m, void *v)
5288{
5289 arch_spin_unlock(&trace_cmdline_lock);
5290 preempt_enable();
5291}
5292
5293static int saved_cmdlines_show(struct seq_file *m, void *v)
5294{
5295 char buf[TASK_COMM_LEN];
5296 unsigned int *pid = v;
5297
5298 __trace_find_cmdline(*pid, buf);
5299 seq_printf(m, "%d %s\n", *pid, buf);
5300 return 0;
5301}
5302
5303static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5304 .start = saved_cmdlines_start,
5305 .next = saved_cmdlines_next,
5306 .stop = saved_cmdlines_stop,
5307 .show = saved_cmdlines_show,
5308};
5309
5310static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5311{
5312 int ret;
5313
5314 ret = tracing_check_open_get_tr(NULL);
5315 if (ret)
5316 return ret;
5317
5318 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5319}
5320
5321static const struct file_operations tracing_saved_cmdlines_fops = {
5322 .open = tracing_saved_cmdlines_open,
5323 .read = seq_read,
5324 .llseek = seq_lseek,
5325 .release = seq_release,
5326};
5327
5328static ssize_t
5329tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5330 size_t cnt, loff_t *ppos)
5331{
5332 char buf[64];
5333 int r;
5334
5335 preempt_disable();
5336 arch_spin_lock(&trace_cmdline_lock);
5337 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5338 arch_spin_unlock(&trace_cmdline_lock);
5339 preempt_enable();
5340
5341 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5342}
5343
5344static int tracing_resize_saved_cmdlines(unsigned int val)
5345{
5346 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5347
5348 s = allocate_cmdlines_buffer(val);
5349 if (!s)
5350 return -ENOMEM;
5351
5352 preempt_disable();
5353 arch_spin_lock(&trace_cmdline_lock);
5354 savedcmd_temp = savedcmd;
5355 savedcmd = s;
5356 arch_spin_unlock(&trace_cmdline_lock);
5357 preempt_enable();
5358 free_saved_cmdlines_buffer(savedcmd_temp);
5359
5360 return 0;
5361}
5362
5363static ssize_t
5364tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5365 size_t cnt, loff_t *ppos)
5366{
5367 unsigned long val;
5368 int ret;
5369
5370 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5371 if (ret)
5372 return ret;
5373
5374 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5375 if (!val || val > PID_MAX_DEFAULT)
5376 return -EINVAL;
5377
5378 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5379 if (ret < 0)
5380 return ret;
5381
5382 *ppos += cnt;
5383
5384 return cnt;
5385}
5386
5387static const struct file_operations tracing_saved_cmdlines_size_fops = {
5388 .open = tracing_open_generic,
5389 .read = tracing_saved_cmdlines_size_read,
5390 .write = tracing_saved_cmdlines_size_write,
5391};
5392
5393#ifdef CONFIG_TRACE_EVAL_MAP_FILE
5394static union trace_eval_map_item *
5395update_eval_map(union trace_eval_map_item *ptr)
5396{
5397 if (!ptr->map.eval_string) {
5398 if (ptr->tail.next) {
5399 ptr = ptr->tail.next;
5400 /* Set ptr to the next real item (skip head) */
5401 ptr++;
5402 } else
5403 return NULL;
5404 }
5405 return ptr;
5406}
5407
5408static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5409{
5410 union trace_eval_map_item *ptr = v;
5411
5412 /*
5413 * Paranoid! If ptr points to end, we don't want to increment past it.
5414 * This really should never happen.
5415 */
5416 ptr = update_eval_map(ptr);
5417 if (WARN_ON_ONCE(!ptr))
5418 return NULL;
5419
5420 ptr++;
5421
5422 (*pos)++;
5423
5424 ptr = update_eval_map(ptr);
5425
5426 return ptr;
5427}
5428
5429static void *eval_map_start(struct seq_file *m, loff_t *pos)
5430{
5431 union trace_eval_map_item *v;
5432 loff_t l = 0;
5433
5434 mutex_lock(&trace_eval_mutex);
5435
5436 v = trace_eval_maps;
5437 if (v)
5438 v++;
5439
5440 while (v && l < *pos) {
5441 v = eval_map_next(m, v, &l);
5442 }
5443
5444 return v;
5445}
5446
5447static void eval_map_stop(struct seq_file *m, void *v)
5448{
5449 mutex_unlock(&trace_eval_mutex);
5450}
5451
5452static int eval_map_show(struct seq_file *m, void *v)
5453{
5454 union trace_eval_map_item *ptr = v;
5455
5456 seq_printf(m, "%s %ld (%s)\n",
5457 ptr->map.eval_string, ptr->map.eval_value,
5458 ptr->map.system);
5459
5460 return 0;
5461}
5462
5463static const struct seq_operations tracing_eval_map_seq_ops = {
5464 .start = eval_map_start,
5465 .next = eval_map_next,
5466 .stop = eval_map_stop,
5467 .show = eval_map_show,
5468};
5469
5470static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5471{
5472 int ret;
5473
5474 ret = tracing_check_open_get_tr(NULL);
5475 if (ret)
5476 return ret;
5477
5478 return seq_open(filp, &tracing_eval_map_seq_ops);
5479}
5480
5481static const struct file_operations tracing_eval_map_fops = {
5482 .open = tracing_eval_map_open,
5483 .read = seq_read,
5484 .llseek = seq_lseek,
5485 .release = seq_release,
5486};
5487
5488static inline union trace_eval_map_item *
5489trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5490{
5491 /* Return tail of array given the head */
5492 return ptr + ptr->head.length + 1;
5493}
5494
5495static void
5496trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5497 int len)
5498{
5499 struct trace_eval_map **stop;
5500 struct trace_eval_map **map;
5501 union trace_eval_map_item *map_array;
5502 union trace_eval_map_item *ptr;
5503
5504 stop = start + len;
5505
5506 /*
5507 * The trace_eval_maps contains the map plus a head and tail item,
5508 * where the head holds the module and length of array, and the
5509 * tail holds a pointer to the next list.
5510 */
5511 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5512 if (!map_array) {
5513 pr_warn("Unable to allocate trace eval mapping\n");
5514 return;
5515 }
5516
5517 mutex_lock(&trace_eval_mutex);
5518
5519 if (!trace_eval_maps)
5520 trace_eval_maps = map_array;
5521 else {
5522 ptr = trace_eval_maps;
5523 for (;;) {
5524 ptr = trace_eval_jmp_to_tail(ptr);
5525 if (!ptr->tail.next)
5526 break;
5527 ptr = ptr->tail.next;
5528
5529 }
5530 ptr->tail.next = map_array;
5531 }
5532 map_array->head.mod = mod;
5533 map_array->head.length = len;
5534 map_array++;
5535
5536 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5537 map_array->map = **map;
5538 map_array++;
5539 }
5540 memset(map_array, 0, sizeof(*map_array));
5541
5542 mutex_unlock(&trace_eval_mutex);
5543}
5544
5545static void trace_create_eval_file(struct dentry *d_tracer)
5546{
5547 trace_create_file("eval_map", 0444, d_tracer,
5548 NULL, &tracing_eval_map_fops);
5549}
5550
5551#else /* CONFIG_TRACE_EVAL_MAP_FILE */
5552static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5553static inline void trace_insert_eval_map_file(struct module *mod,
5554 struct trace_eval_map **start, int len) { }
5555#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5556
5557static void trace_insert_eval_map(struct module *mod,
5558 struct trace_eval_map **start, int len)
5559{
5560 struct trace_eval_map **map;
5561
5562 if (len <= 0)
5563 return;
5564
5565 map = start;
5566
5567 trace_event_eval_update(map, len);
5568
5569 trace_insert_eval_map_file(mod, start, len);
5570}
5571
5572static ssize_t
5573tracing_set_trace_read(struct file *filp, char __user *ubuf,
5574 size_t cnt, loff_t *ppos)
5575{
5576 struct trace_array *tr = filp->private_data;
5577 char buf[MAX_TRACER_SIZE+2];
5578 int r;
5579
5580 mutex_lock(&trace_types_lock);
5581 r = sprintf(buf, "%s\n", tr->current_trace->name);
5582 mutex_unlock(&trace_types_lock);
5583
5584 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5585}
5586
5587int tracer_init(struct tracer *t, struct trace_array *tr)
5588{
5589 tracing_reset_online_cpus(&tr->trace_buffer);
5590 return t->init(tr);
5591}
5592
5593static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5594{
5595 int cpu;
5596
5597 for_each_tracing_cpu(cpu)
5598 per_cpu_ptr(buf->data, cpu)->entries = val;
5599}
5600
5601#ifdef CONFIG_TRACER_MAX_TRACE
5602/* resize @tr's buffer to the size of @size_tr's entries */
5603static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5604 struct trace_buffer *size_buf, int cpu_id)
5605{
5606 int cpu, ret = 0;
5607
5608 if (cpu_id == RING_BUFFER_ALL_CPUS) {
5609 for_each_tracing_cpu(cpu) {
5610 ret = ring_buffer_resize(trace_buf->buffer,
5611 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5612 if (ret < 0)
5613 break;
5614 per_cpu_ptr(trace_buf->data, cpu)->entries =
5615 per_cpu_ptr(size_buf->data, cpu)->entries;
5616 }
5617 } else {
5618 ret = ring_buffer_resize(trace_buf->buffer,
5619 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5620 if (ret == 0)
5621 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5622 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5623 }
5624
5625 return ret;
5626}
5627#endif /* CONFIG_TRACER_MAX_TRACE */
5628
5629static int __tracing_resize_ring_buffer(struct trace_array *tr,
5630 unsigned long size, int cpu)
5631{
5632 int ret;
5633
5634 /*
5635 * If kernel or user changes the size of the ring buffer
5636 * we use the size that was given, and we can forget about
5637 * expanding it later.
5638 */
5639 ring_buffer_expanded = true;
5640
5641 /* May be called before buffers are initialized */
5642 if (!tr->trace_buffer.buffer)
5643 return 0;
5644
5645 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5646 if (ret < 0)
5647 return ret;
5648
5649#ifdef CONFIG_TRACER_MAX_TRACE
5650 if (!tr->current_trace->use_max_tr)
5651 goto out;
5652
5653 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5654 if (ret < 0) {
5655 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5656 &tr->trace_buffer, cpu);
5657 if (r < 0) {
5658 /*
5659 * AARGH! We are left with different
5660 * size max buffer!!!!
5661 * The max buffer is our "snapshot" buffer.
5662 * When a tracer needs a snapshot (one of the
5663 * latency tracers), it swaps the max buffer
5664 * with the saved snap shot. We succeeded to
5665 * update the size of the main buffer, but failed to
5666 * update the size of the max buffer. But when we tried
5667 * to reset the main buffer to the original size, we
5668 * failed there too. This is very unlikely to
5669 * happen, but if it does, warn and kill all
5670 * tracing.
5671 */
5672 WARN_ON(1);
5673 tracing_disabled = 1;
5674 }
5675 return ret;
5676 }
5677
5678 if (cpu == RING_BUFFER_ALL_CPUS)
5679 set_buffer_entries(&tr->max_buffer, size);
5680 else
5681 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5682
5683 out:
5684#endif /* CONFIG_TRACER_MAX_TRACE */
5685
5686 if (cpu == RING_BUFFER_ALL_CPUS)
5687 set_buffer_entries(&tr->trace_buffer, size);
5688 else
5689 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5690
5691 return ret;
5692}
5693
5694static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5695 unsigned long size, int cpu_id)
5696{
5697 int ret = size;
5698
5699 mutex_lock(&trace_types_lock);
5700
5701 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5702 /* make sure, this cpu is enabled in the mask */
5703 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5704 ret = -EINVAL;
5705 goto out;
5706 }
5707 }
5708
5709 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5710 if (ret < 0)
5711 ret = -ENOMEM;
5712
5713out:
5714 mutex_unlock(&trace_types_lock);
5715
5716 return ret;
5717}
5718
5719
5720/**
5721 * tracing_update_buffers - used by tracing facility to expand ring buffers
5722 *
5723 * To save on memory when the tracing is never used on a system with it
5724 * configured in. The ring buffers are set to a minimum size. But once
5725 * a user starts to use the tracing facility, then they need to grow
5726 * to their default size.
5727 *
5728 * This function is to be called when a tracer is about to be used.
5729 */
5730int tracing_update_buffers(void)
5731{
5732 int ret = 0;
5733
5734 mutex_lock(&trace_types_lock);
5735 if (!ring_buffer_expanded)
5736 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5737 RING_BUFFER_ALL_CPUS);
5738 mutex_unlock(&trace_types_lock);
5739
5740 return ret;
5741}
5742
5743struct trace_option_dentry;
5744
5745static void
5746create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5747
5748/*
5749 * Used to clear out the tracer before deletion of an instance.
5750 * Must have trace_types_lock held.
5751 */
5752static void tracing_set_nop(struct trace_array *tr)
5753{
5754 if (tr->current_trace == &nop_trace)
5755 return;
5756
5757 tr->current_trace->enabled--;
5758
5759 if (tr->current_trace->reset)
5760 tr->current_trace->reset(tr);
5761
5762 tr->current_trace = &nop_trace;
5763}
5764
5765static bool tracer_options_updated;
5766
5767static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5768{
5769 /* Only enable if the directory has been created already. */
5770 if (!tr->dir)
5771 return;
5772
5773 /* Only create trace option files after update_tracer_options finish */
5774 if (!tracer_options_updated)
5775 return;
5776
5777 create_trace_option_files(tr, t);
5778}
5779
5780static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5781{
5782 struct tracer *t;
5783#ifdef CONFIG_TRACER_MAX_TRACE
5784 bool had_max_tr;
5785#endif
5786 int ret = 0;
5787
5788 mutex_lock(&trace_types_lock);
5789
5790 if (!ring_buffer_expanded) {
5791 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5792 RING_BUFFER_ALL_CPUS);
5793 if (ret < 0)
5794 goto out;
5795 ret = 0;
5796 }
5797
5798 for (t = trace_types; t; t = t->next) {
5799 if (strcmp(t->name, buf) == 0)
5800 break;
5801 }
5802 if (!t) {
5803 ret = -EINVAL;
5804 goto out;
5805 }
5806 if (t == tr->current_trace)
5807 goto out;
5808
5809#ifdef CONFIG_TRACER_SNAPSHOT
5810 if (t->use_max_tr) {
5811 local_irq_disable();
5812 arch_spin_lock(&tr->max_lock);
5813 if (tr->cond_snapshot)
5814 ret = -EBUSY;
5815 arch_spin_unlock(&tr->max_lock);
5816 local_irq_enable();
5817 if (ret)
5818 goto out;
5819 }
5820#endif
5821 /* Some tracers won't work on kernel command line */
5822 if (system_state < SYSTEM_RUNNING && t->noboot) {
5823 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5824 t->name);
5825 goto out;
5826 }
5827
5828 /* Some tracers are only allowed for the top level buffer */
5829 if (!trace_ok_for_array(t, tr)) {
5830 ret = -EINVAL;
5831 goto out;
5832 }
5833
5834 /* If trace pipe files are being read, we can't change the tracer */
5835 if (tr->trace_ref) {
5836 ret = -EBUSY;
5837 goto out;
5838 }
5839
5840 trace_branch_disable();
5841
5842 tr->current_trace->enabled--;
5843
5844 if (tr->current_trace->reset)
5845 tr->current_trace->reset(tr);
5846
5847 /* Current trace needs to be nop_trace before synchronize_rcu */
5848 tr->current_trace = &nop_trace;
5849
5850#ifdef CONFIG_TRACER_MAX_TRACE
5851 had_max_tr = tr->allocated_snapshot;
5852
5853 if (had_max_tr && !t->use_max_tr) {
5854 /*
5855 * We need to make sure that the update_max_tr sees that
5856 * current_trace changed to nop_trace to keep it from
5857 * swapping the buffers after we resize it.
5858 * The update_max_tr is called from interrupts disabled
5859 * so a synchronized_sched() is sufficient.
5860 */
5861 synchronize_rcu();
5862 free_snapshot(tr);
5863 }
5864#endif
5865
5866#ifdef CONFIG_TRACER_MAX_TRACE
5867 if (t->use_max_tr && !had_max_tr) {
5868 ret = tracing_alloc_snapshot_instance(tr);
5869 if (ret < 0)
5870 goto out;
5871 }
5872#endif
5873
5874 if (t->init) {
5875 ret = tracer_init(t, tr);
5876 if (ret)
5877 goto out;
5878 }
5879
5880 tr->current_trace = t;
5881 tr->current_trace->enabled++;
5882 trace_branch_enable(tr);
5883 out:
5884 mutex_unlock(&trace_types_lock);
5885
5886 return ret;
5887}
5888
5889static ssize_t
5890tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5891 size_t cnt, loff_t *ppos)
5892{
5893 struct trace_array *tr = filp->private_data;
5894 char buf[MAX_TRACER_SIZE+1];
5895 int i;
5896 size_t ret;
5897 int err;
5898
5899 ret = cnt;
5900
5901 if (cnt > MAX_TRACER_SIZE)
5902 cnt = MAX_TRACER_SIZE;
5903
5904 if (copy_from_user(buf, ubuf, cnt))
5905 return -EFAULT;
5906
5907 buf[cnt] = 0;
5908
5909 /* strip ending whitespace. */
5910 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5911 buf[i] = 0;
5912
5913 err = tracing_set_tracer(tr, buf);
5914 if (err)
5915 return err;
5916
5917 *ppos += ret;
5918
5919 return ret;
5920}
5921
5922static ssize_t
5923tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5924 size_t cnt, loff_t *ppos)
5925{
5926 char buf[64];
5927 int r;
5928
5929 r = snprintf(buf, sizeof(buf), "%ld\n",
5930 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5931 if (r > sizeof(buf))
5932 r = sizeof(buf);
5933 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5934}
5935
5936static ssize_t
5937tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5938 size_t cnt, loff_t *ppos)
5939{
5940 unsigned long val;
5941 int ret;
5942
5943 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5944 if (ret)
5945 return ret;
5946
5947 *ptr = val * 1000;
5948
5949 return cnt;
5950}
5951
5952static ssize_t
5953tracing_thresh_read(struct file *filp, char __user *ubuf,
5954 size_t cnt, loff_t *ppos)
5955{
5956 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5957}
5958
5959static ssize_t
5960tracing_thresh_write(struct file *filp, const char __user *ubuf,
5961 size_t cnt, loff_t *ppos)
5962{
5963 struct trace_array *tr = filp->private_data;
5964 int ret;
5965
5966 mutex_lock(&trace_types_lock);
5967 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5968 if (ret < 0)
5969 goto out;
5970
5971 if (tr->current_trace->update_thresh) {
5972 ret = tr->current_trace->update_thresh(tr);
5973 if (ret < 0)
5974 goto out;
5975 }
5976
5977 ret = cnt;
5978out:
5979 mutex_unlock(&trace_types_lock);
5980
5981 return ret;
5982}
5983
5984#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5985
5986static ssize_t
5987tracing_max_lat_read(struct file *filp, char __user *ubuf,
5988 size_t cnt, loff_t *ppos)
5989{
5990 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5991}
5992
5993static ssize_t
5994tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5995 size_t cnt, loff_t *ppos)
5996{
5997 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5998}
5999
6000#endif
6001
6002static int tracing_open_pipe(struct inode *inode, struct file *filp)
6003{
6004 struct trace_array *tr = inode->i_private;
6005 struct trace_iterator *iter;
6006 int ret;
6007
6008 ret = tracing_check_open_get_tr(tr);
6009 if (ret)
6010 return ret;
6011
6012 mutex_lock(&trace_types_lock);
6013
6014 /* create a buffer to store the information to pass to userspace */
6015 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6016 if (!iter) {
6017 ret = -ENOMEM;
6018 __trace_array_put(tr);
6019 goto out;
6020 }
6021
6022 trace_seq_init(&iter->seq);
6023 iter->trace = tr->current_trace;
6024
6025 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6026 ret = -ENOMEM;
6027 goto fail;
6028 }
6029
6030 /* trace pipe does not show start of buffer */
6031 cpumask_setall(iter->started);
6032
6033 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6034 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6035
6036 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6037 if (trace_clocks[tr->clock_id].in_ns)
6038 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6039
6040 iter->tr = tr;
6041 iter->trace_buffer = &tr->trace_buffer;
6042 iter->cpu_file = tracing_get_cpu(inode);
6043 mutex_init(&iter->mutex);
6044 filp->private_data = iter;
6045
6046 if (iter->trace->pipe_open)
6047 iter->trace->pipe_open(iter);
6048
6049 nonseekable_open(inode, filp);
6050
6051 tr->trace_ref++;
6052out:
6053 mutex_unlock(&trace_types_lock);
6054 return ret;
6055
6056fail:
6057 kfree(iter);
6058 __trace_array_put(tr);
6059 mutex_unlock(&trace_types_lock);
6060 return ret;
6061}
6062
6063static int tracing_release_pipe(struct inode *inode, struct file *file)
6064{
6065 struct trace_iterator *iter = file->private_data;
6066 struct trace_array *tr = inode->i_private;
6067
6068 mutex_lock(&trace_types_lock);
6069
6070 tr->trace_ref--;
6071
6072 if (iter->trace->pipe_close)
6073 iter->trace->pipe_close(iter);
6074
6075 mutex_unlock(&trace_types_lock);
6076
6077 free_cpumask_var(iter->started);
6078 mutex_destroy(&iter->mutex);
6079 kfree(iter);
6080
6081 trace_array_put(tr);
6082
6083 return 0;
6084}
6085
6086static __poll_t
6087trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6088{
6089 struct trace_array *tr = iter->tr;
6090
6091 /* Iterators are static, they should be filled or empty */
6092 if (trace_buffer_iter(iter, iter->cpu_file))
6093 return EPOLLIN | EPOLLRDNORM;
6094
6095 if (tr->trace_flags & TRACE_ITER_BLOCK)
6096 /*
6097 * Always select as readable when in blocking mode
6098 */
6099 return EPOLLIN | EPOLLRDNORM;
6100 else
6101 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
6102 filp, poll_table, iter->tr->buffer_percent);
6103}
6104
6105static __poll_t
6106tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6107{
6108 struct trace_iterator *iter = filp->private_data;
6109
6110 return trace_poll(iter, filp, poll_table);
6111}
6112
6113/* Must be called with iter->mutex held. */
6114static int tracing_wait_pipe(struct file *filp)
6115{
6116 struct trace_iterator *iter = filp->private_data;
6117 int ret;
6118
6119 while (trace_empty(iter)) {
6120
6121 if ((filp->f_flags & O_NONBLOCK)) {
6122 return -EAGAIN;
6123 }
6124
6125 /*
6126 * We block until we read something and tracing is disabled.
6127 * We still block if tracing is disabled, but we have never
6128 * read anything. This allows a user to cat this file, and
6129 * then enable tracing. But after we have read something,
6130 * we give an EOF when tracing is again disabled.
6131 *
6132 * iter->pos will be 0 if we haven't read anything.
6133 */
6134 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6135 break;
6136
6137 mutex_unlock(&iter->mutex);
6138
6139 ret = wait_on_pipe(iter, 0);
6140
6141 mutex_lock(&iter->mutex);
6142
6143 if (ret)
6144 return ret;
6145 }
6146
6147 return 1;
6148}
6149
6150/*
6151 * Consumer reader.
6152 */
6153static ssize_t
6154tracing_read_pipe(struct file *filp, char __user *ubuf,
6155 size_t cnt, loff_t *ppos)
6156{
6157 struct trace_iterator *iter = filp->private_data;
6158 ssize_t sret;
6159
6160 /*
6161 * Avoid more than one consumer on a single file descriptor
6162 * This is just a matter of traces coherency, the ring buffer itself
6163 * is protected.
6164 */
6165 mutex_lock(&iter->mutex);
6166
6167 /* return any leftover data */
6168 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6169 if (sret != -EBUSY)
6170 goto out;
6171
6172 trace_seq_init(&iter->seq);
6173
6174 if (iter->trace->read) {
6175 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6176 if (sret)
6177 goto out;
6178 }
6179
6180waitagain:
6181 sret = tracing_wait_pipe(filp);
6182 if (sret <= 0)
6183 goto out;
6184
6185 /* stop when tracing is finished */
6186 if (trace_empty(iter)) {
6187 sret = 0;
6188 goto out;
6189 }
6190
6191 if (cnt >= PAGE_SIZE)
6192 cnt = PAGE_SIZE - 1;
6193
6194 /* reset all but tr, trace, and overruns */
6195 memset(&iter->seq, 0,
6196 sizeof(struct trace_iterator) -
6197 offsetof(struct trace_iterator, seq));
6198 cpumask_clear(iter->started);
6199 trace_seq_init(&iter->seq);
6200 iter->pos = -1;
6201
6202 trace_event_read_lock();
6203 trace_access_lock(iter->cpu_file);
6204 while (trace_find_next_entry_inc(iter) != NULL) {
6205 enum print_line_t ret;
6206 int save_len = iter->seq.seq.len;
6207
6208 ret = print_trace_line(iter);
6209 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6210 /*
6211 * If one print_trace_line() fills entire trace_seq in one shot,
6212 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6213 * In this case, we need to consume it, otherwise, loop will peek
6214 * this event next time, resulting in an infinite loop.
6215 */
6216 if (save_len == 0) {
6217 iter->seq.full = 0;
6218 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6219 trace_consume(iter);
6220 break;
6221 }
6222
6223 /* In other cases, don't print partial lines */
6224 iter->seq.seq.len = save_len;
6225 break;
6226 }
6227 if (ret != TRACE_TYPE_NO_CONSUME)
6228 trace_consume(iter);
6229
6230 if (trace_seq_used(&iter->seq) >= cnt)
6231 break;
6232
6233 /*
6234 * Setting the full flag means we reached the trace_seq buffer
6235 * size and we should leave by partial output condition above.
6236 * One of the trace_seq_* functions is not used properly.
6237 */
6238 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6239 iter->ent->type);
6240 }
6241 trace_access_unlock(iter->cpu_file);
6242 trace_event_read_unlock();
6243
6244 /* Now copy what we have to the user */
6245 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6246 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6247 trace_seq_init(&iter->seq);
6248
6249 /*
6250 * If there was nothing to send to user, in spite of consuming trace
6251 * entries, go back to wait for more entries.
6252 */
6253 if (sret == -EBUSY)
6254 goto waitagain;
6255
6256out:
6257 mutex_unlock(&iter->mutex);
6258
6259 return sret;
6260}
6261
6262static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6263 unsigned int idx)
6264{
6265 __free_page(spd->pages[idx]);
6266}
6267
6268static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6269 .confirm = generic_pipe_buf_confirm,
6270 .release = generic_pipe_buf_release,
6271 .steal = generic_pipe_buf_steal,
6272 .get = generic_pipe_buf_get,
6273};
6274
6275static size_t
6276tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6277{
6278 size_t count;
6279 int save_len;
6280 int ret;
6281
6282 /* Seq buffer is page-sized, exactly what we need. */
6283 for (;;) {
6284 save_len = iter->seq.seq.len;
6285 ret = print_trace_line(iter);
6286
6287 if (trace_seq_has_overflowed(&iter->seq)) {
6288 iter->seq.seq.len = save_len;
6289 break;
6290 }
6291
6292 /*
6293 * This should not be hit, because it should only
6294 * be set if the iter->seq overflowed. But check it
6295 * anyway to be safe.
6296 */
6297 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6298 iter->seq.seq.len = save_len;
6299 break;
6300 }
6301
6302 count = trace_seq_used(&iter->seq) - save_len;
6303 if (rem < count) {
6304 rem = 0;
6305 iter->seq.seq.len = save_len;
6306 break;
6307 }
6308
6309 if (ret != TRACE_TYPE_NO_CONSUME)
6310 trace_consume(iter);
6311 rem -= count;
6312 if (!trace_find_next_entry_inc(iter)) {
6313 rem = 0;
6314 iter->ent = NULL;
6315 break;
6316 }
6317 }
6318
6319 return rem;
6320}
6321
6322static ssize_t tracing_splice_read_pipe(struct file *filp,
6323 loff_t *ppos,
6324 struct pipe_inode_info *pipe,
6325 size_t len,
6326 unsigned int flags)
6327{
6328 struct page *pages_def[PIPE_DEF_BUFFERS];
6329 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6330 struct trace_iterator *iter = filp->private_data;
6331 struct splice_pipe_desc spd = {
6332 .pages = pages_def,
6333 .partial = partial_def,
6334 .nr_pages = 0, /* This gets updated below. */
6335 .nr_pages_max = PIPE_DEF_BUFFERS,
6336 .ops = &tracing_pipe_buf_ops,
6337 .spd_release = tracing_spd_release_pipe,
6338 };
6339 ssize_t ret;
6340 size_t rem;
6341 unsigned int i;
6342
6343 if (splice_grow_spd(pipe, &spd))
6344 return -ENOMEM;
6345
6346 mutex_lock(&iter->mutex);
6347
6348 if (iter->trace->splice_read) {
6349 ret = iter->trace->splice_read(iter, filp,
6350 ppos, pipe, len, flags);
6351 if (ret)
6352 goto out_err;
6353 }
6354
6355 ret = tracing_wait_pipe(filp);
6356 if (ret <= 0)
6357 goto out_err;
6358
6359 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6360 ret = -EFAULT;
6361 goto out_err;
6362 }
6363
6364 trace_event_read_lock();
6365 trace_access_lock(iter->cpu_file);
6366
6367 /* Fill as many pages as possible. */
6368 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6369 spd.pages[i] = alloc_page(GFP_KERNEL);
6370 if (!spd.pages[i])
6371 break;
6372
6373 rem = tracing_fill_pipe_page(rem, iter);
6374
6375 /* Copy the data into the page, so we can start over. */
6376 ret = trace_seq_to_buffer(&iter->seq,
6377 page_address(spd.pages[i]),
6378 trace_seq_used(&iter->seq));
6379 if (ret < 0) {
6380 __free_page(spd.pages[i]);
6381 break;
6382 }
6383 spd.partial[i].offset = 0;
6384 spd.partial[i].len = trace_seq_used(&iter->seq);
6385
6386 trace_seq_init(&iter->seq);
6387 }
6388
6389 trace_access_unlock(iter->cpu_file);
6390 trace_event_read_unlock();
6391 mutex_unlock(&iter->mutex);
6392
6393 spd.nr_pages = i;
6394
6395 if (i)
6396 ret = splice_to_pipe(pipe, &spd);
6397 else
6398 ret = 0;
6399out:
6400 splice_shrink_spd(&spd);
6401 return ret;
6402
6403out_err:
6404 mutex_unlock(&iter->mutex);
6405 goto out;
6406}
6407
6408static ssize_t
6409tracing_entries_read(struct file *filp, char __user *ubuf,
6410 size_t cnt, loff_t *ppos)
6411{
6412 struct inode *inode = file_inode(filp);
6413 struct trace_array *tr = inode->i_private;
6414 int cpu = tracing_get_cpu(inode);
6415 char buf[64];
6416 int r = 0;
6417 ssize_t ret;
6418
6419 mutex_lock(&trace_types_lock);
6420
6421 if (cpu == RING_BUFFER_ALL_CPUS) {
6422 int cpu, buf_size_same;
6423 unsigned long size;
6424
6425 size = 0;
6426 buf_size_same = 1;
6427 /* check if all cpu sizes are same */
6428 for_each_tracing_cpu(cpu) {
6429 /* fill in the size from first enabled cpu */
6430 if (size == 0)
6431 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6432 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6433 buf_size_same = 0;
6434 break;
6435 }
6436 }
6437
6438 if (buf_size_same) {
6439 if (!ring_buffer_expanded)
6440 r = sprintf(buf, "%lu (expanded: %lu)\n",
6441 size >> 10,
6442 trace_buf_size >> 10);
6443 else
6444 r = sprintf(buf, "%lu\n", size >> 10);
6445 } else
6446 r = sprintf(buf, "X\n");
6447 } else
6448 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6449
6450 mutex_unlock(&trace_types_lock);
6451
6452 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6453 return ret;
6454}
6455
6456static ssize_t
6457tracing_entries_write(struct file *filp, const char __user *ubuf,
6458 size_t cnt, loff_t *ppos)
6459{
6460 struct inode *inode = file_inode(filp);
6461 struct trace_array *tr = inode->i_private;
6462 unsigned long val;
6463 int ret;
6464
6465 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6466 if (ret)
6467 return ret;
6468
6469 /* must have at least 1 entry */
6470 if (!val)
6471 return -EINVAL;
6472
6473 /* value is in KB */
6474 val <<= 10;
6475 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6476 if (ret < 0)
6477 return ret;
6478
6479 *ppos += cnt;
6480
6481 return cnt;
6482}
6483
6484static ssize_t
6485tracing_total_entries_read(struct file *filp, char __user *ubuf,
6486 size_t cnt, loff_t *ppos)
6487{
6488 struct trace_array *tr = filp->private_data;
6489 char buf[64];
6490 int r, cpu;
6491 unsigned long size = 0, expanded_size = 0;
6492
6493 mutex_lock(&trace_types_lock);
6494 for_each_tracing_cpu(cpu) {
6495 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6496 if (!ring_buffer_expanded)
6497 expanded_size += trace_buf_size >> 10;
6498 }
6499 if (ring_buffer_expanded)
6500 r = sprintf(buf, "%lu\n", size);
6501 else
6502 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6503 mutex_unlock(&trace_types_lock);
6504
6505 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6506}
6507
6508static ssize_t
6509tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6510 size_t cnt, loff_t *ppos)
6511{
6512 /*
6513 * There is no need to read what the user has written, this function
6514 * is just to make sure that there is no error when "echo" is used
6515 */
6516
6517 *ppos += cnt;
6518
6519 return cnt;
6520}
6521
6522static int
6523tracing_free_buffer_release(struct inode *inode, struct file *filp)
6524{
6525 struct trace_array *tr = inode->i_private;
6526
6527 /* disable tracing ? */
6528 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6529 tracer_tracing_off(tr);
6530 /* resize the ring buffer to 0 */
6531 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6532
6533 trace_array_put(tr);
6534
6535 return 0;
6536}
6537
6538static ssize_t
6539tracing_mark_write(struct file *filp, const char __user *ubuf,
6540 size_t cnt, loff_t *fpos)
6541{
6542 struct trace_array *tr = filp->private_data;
6543 struct ring_buffer_event *event;
6544 enum event_trigger_type tt = ETT_NONE;
6545 struct ring_buffer *buffer;
6546 struct print_entry *entry;
6547 unsigned long irq_flags;
6548 ssize_t written;
6549 int size;
6550 int len;
6551
6552/* Used in tracing_mark_raw_write() as well */
6553#define FAULTED_STR "<faulted>"
6554#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6555
6556 if (tracing_disabled)
6557 return -EINVAL;
6558
6559 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6560 return -EINVAL;
6561
6562 if (cnt > TRACE_BUF_SIZE)
6563 cnt = TRACE_BUF_SIZE;
6564
6565 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6566
6567 local_save_flags(irq_flags);
6568 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6569
6570 /* If less than "<faulted>", then make sure we can still add that */
6571 if (cnt < FAULTED_SIZE)
6572 size += FAULTED_SIZE - cnt;
6573
6574 buffer = tr->trace_buffer.buffer;
6575 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6576 irq_flags, preempt_count());
6577 if (unlikely(!event))
6578 /* Ring buffer disabled, return as if not open for write */
6579 return -EBADF;
6580
6581 entry = ring_buffer_event_data(event);
6582 entry->ip = _THIS_IP_;
6583
6584 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6585 if (len) {
6586 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6587 cnt = FAULTED_SIZE;
6588 written = -EFAULT;
6589 } else
6590 written = cnt;
6591 len = cnt;
6592
6593 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6594 /* do not add \n before testing triggers, but add \0 */
6595 entry->buf[cnt] = '\0';
6596 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6597 }
6598
6599 if (entry->buf[cnt - 1] != '\n') {
6600 entry->buf[cnt] = '\n';
6601 entry->buf[cnt + 1] = '\0';
6602 } else
6603 entry->buf[cnt] = '\0';
6604
6605 __buffer_unlock_commit(buffer, event);
6606
6607 if (tt)
6608 event_triggers_post_call(tr->trace_marker_file, tt);
6609
6610 if (written > 0)
6611 *fpos += written;
6612
6613 return written;
6614}
6615
6616/* Limit it for now to 3K (including tag) */
6617#define RAW_DATA_MAX_SIZE (1024*3)
6618
6619static ssize_t
6620tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6621 size_t cnt, loff_t *fpos)
6622{
6623 struct trace_array *tr = filp->private_data;
6624 struct ring_buffer_event *event;
6625 struct ring_buffer *buffer;
6626 struct raw_data_entry *entry;
6627 unsigned long irq_flags;
6628 ssize_t written;
6629 int size;
6630 int len;
6631
6632#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6633
6634 if (tracing_disabled)
6635 return -EINVAL;
6636
6637 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6638 return -EINVAL;
6639
6640 /* The marker must at least have a tag id */
6641 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6642 return -EINVAL;
6643
6644 if (cnt > TRACE_BUF_SIZE)
6645 cnt = TRACE_BUF_SIZE;
6646
6647 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6648
6649 local_save_flags(irq_flags);
6650 size = sizeof(*entry) + cnt;
6651 if (cnt < FAULT_SIZE_ID)
6652 size += FAULT_SIZE_ID - cnt;
6653
6654 buffer = tr->trace_buffer.buffer;
6655 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6656 irq_flags, preempt_count());
6657 if (!event)
6658 /* Ring buffer disabled, return as if not open for write */
6659 return -EBADF;
6660
6661 entry = ring_buffer_event_data(event);
6662
6663 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6664 if (len) {
6665 entry->id = -1;
6666 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6667 written = -EFAULT;
6668 } else
6669 written = cnt;
6670
6671 __buffer_unlock_commit(buffer, event);
6672
6673 if (written > 0)
6674 *fpos += written;
6675
6676 return written;
6677}
6678
6679static int tracing_clock_show(struct seq_file *m, void *v)
6680{
6681 struct trace_array *tr = m->private;
6682 int i;
6683
6684 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6685 seq_printf(m,
6686 "%s%s%s%s", i ? " " : "",
6687 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6688 i == tr->clock_id ? "]" : "");
6689 seq_putc(m, '\n');
6690
6691 return 0;
6692}
6693
6694int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6695{
6696 int i;
6697
6698 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6699 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6700 break;
6701 }
6702 if (i == ARRAY_SIZE(trace_clocks))
6703 return -EINVAL;
6704
6705 mutex_lock(&trace_types_lock);
6706
6707 tr->clock_id = i;
6708
6709 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6710
6711 /*
6712 * New clock may not be consistent with the previous clock.
6713 * Reset the buffer so that it doesn't have incomparable timestamps.
6714 */
6715 tracing_reset_online_cpus(&tr->trace_buffer);
6716
6717#ifdef CONFIG_TRACER_MAX_TRACE
6718 if (tr->max_buffer.buffer)
6719 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6720 tracing_reset_online_cpus(&tr->max_buffer);
6721#endif
6722
6723 mutex_unlock(&trace_types_lock);
6724
6725 return 0;
6726}
6727
6728static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6729 size_t cnt, loff_t *fpos)
6730{
6731 struct seq_file *m = filp->private_data;
6732 struct trace_array *tr = m->private;
6733 char buf[64];
6734 const char *clockstr;
6735 int ret;
6736
6737 if (cnt >= sizeof(buf))
6738 return -EINVAL;
6739
6740 if (copy_from_user(buf, ubuf, cnt))
6741 return -EFAULT;
6742
6743 buf[cnt] = 0;
6744
6745 clockstr = strstrip(buf);
6746
6747 ret = tracing_set_clock(tr, clockstr);
6748 if (ret)
6749 return ret;
6750
6751 *fpos += cnt;
6752
6753 return cnt;
6754}
6755
6756static int tracing_clock_open(struct inode *inode, struct file *file)
6757{
6758 struct trace_array *tr = inode->i_private;
6759 int ret;
6760
6761 ret = tracing_check_open_get_tr(tr);
6762 if (ret)
6763 return ret;
6764
6765 ret = single_open(file, tracing_clock_show, inode->i_private);
6766 if (ret < 0)
6767 trace_array_put(tr);
6768
6769 return ret;
6770}
6771
6772static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6773{
6774 struct trace_array *tr = m->private;
6775
6776 mutex_lock(&trace_types_lock);
6777
6778 if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6779 seq_puts(m, "delta [absolute]\n");
6780 else
6781 seq_puts(m, "[delta] absolute\n");
6782
6783 mutex_unlock(&trace_types_lock);
6784
6785 return 0;
6786}
6787
6788static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6789{
6790 struct trace_array *tr = inode->i_private;
6791 int ret;
6792
6793 ret = tracing_check_open_get_tr(tr);
6794 if (ret)
6795 return ret;
6796
6797 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6798 if (ret < 0)
6799 trace_array_put(tr);
6800
6801 return ret;
6802}
6803
6804int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6805{
6806 int ret = 0;
6807
6808 mutex_lock(&trace_types_lock);
6809
6810 if (abs && tr->time_stamp_abs_ref++)
6811 goto out;
6812
6813 if (!abs) {
6814 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6815 ret = -EINVAL;
6816 goto out;
6817 }
6818
6819 if (--tr->time_stamp_abs_ref)
6820 goto out;
6821 }
6822
6823 ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6824
6825#ifdef CONFIG_TRACER_MAX_TRACE
6826 if (tr->max_buffer.buffer)
6827 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6828#endif
6829 out:
6830 mutex_unlock(&trace_types_lock);
6831
6832 return ret;
6833}
6834
6835struct ftrace_buffer_info {
6836 struct trace_iterator iter;
6837 void *spare;
6838 unsigned int spare_cpu;
6839 unsigned int read;
6840};
6841
6842#ifdef CONFIG_TRACER_SNAPSHOT
6843static int tracing_snapshot_open(struct inode *inode, struct file *file)
6844{
6845 struct trace_array *tr = inode->i_private;
6846 struct trace_iterator *iter;
6847 struct seq_file *m;
6848 int ret;
6849
6850 ret = tracing_check_open_get_tr(tr);
6851 if (ret)
6852 return ret;
6853
6854 if (file->f_mode & FMODE_READ) {
6855 iter = __tracing_open(inode, file, true);
6856 if (IS_ERR(iter))
6857 ret = PTR_ERR(iter);
6858 } else {
6859 /* Writes still need the seq_file to hold the private data */
6860 ret = -ENOMEM;
6861 m = kzalloc(sizeof(*m), GFP_KERNEL);
6862 if (!m)
6863 goto out;
6864 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6865 if (!iter) {
6866 kfree(m);
6867 goto out;
6868 }
6869 ret = 0;
6870
6871 iter->tr = tr;
6872 iter->trace_buffer = &tr->max_buffer;
6873 iter->cpu_file = tracing_get_cpu(inode);
6874 m->private = iter;
6875 file->private_data = m;
6876 }
6877out:
6878 if (ret < 0)
6879 trace_array_put(tr);
6880
6881 return ret;
6882}
6883
6884static void tracing_swap_cpu_buffer(void *tr)
6885{
6886 update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
6887}
6888
6889static ssize_t
6890tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6891 loff_t *ppos)
6892{
6893 struct seq_file *m = filp->private_data;
6894 struct trace_iterator *iter = m->private;
6895 struct trace_array *tr = iter->tr;
6896 unsigned long val;
6897 int ret;
6898
6899 ret = tracing_update_buffers();
6900 if (ret < 0)
6901 return ret;
6902
6903 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6904 if (ret)
6905 return ret;
6906
6907 mutex_lock(&trace_types_lock);
6908
6909 if (tr->current_trace->use_max_tr) {
6910 ret = -EBUSY;
6911 goto out;
6912 }
6913
6914 local_irq_disable();
6915 arch_spin_lock(&tr->max_lock);
6916 if (tr->cond_snapshot)
6917 ret = -EBUSY;
6918 arch_spin_unlock(&tr->max_lock);
6919 local_irq_enable();
6920 if (ret)
6921 goto out;
6922
6923 switch (val) {
6924 case 0:
6925 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6926 ret = -EINVAL;
6927 break;
6928 }
6929 if (tr->allocated_snapshot)
6930 free_snapshot(tr);
6931 break;
6932 case 1:
6933/* Only allow per-cpu swap if the ring buffer supports it */
6934#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6935 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6936 ret = -EINVAL;
6937 break;
6938 }
6939#endif
6940 if (tr->allocated_snapshot)
6941 ret = resize_buffer_duplicate_size(&tr->max_buffer,
6942 &tr->trace_buffer, iter->cpu_file);
6943 else
6944 ret = tracing_alloc_snapshot_instance(tr);
6945 if (ret < 0)
6946 break;
6947 /* Now, we're going to swap */
6948 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
6949 local_irq_disable();
6950 update_max_tr(tr, current, smp_processor_id(), NULL);
6951 local_irq_enable();
6952 } else {
6953 smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
6954 (void *)tr, 1);
6955 }
6956 break;
6957 default:
6958 if (tr->allocated_snapshot) {
6959 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6960 tracing_reset_online_cpus(&tr->max_buffer);
6961 else
6962 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6963 }
6964 break;
6965 }
6966
6967 if (ret >= 0) {
6968 *ppos += cnt;
6969 ret = cnt;
6970 }
6971out:
6972 mutex_unlock(&trace_types_lock);
6973 return ret;
6974}
6975
6976static int tracing_snapshot_release(struct inode *inode, struct file *file)
6977{
6978 struct seq_file *m = file->private_data;
6979 int ret;
6980
6981 ret = tracing_release(inode, file);
6982
6983 if (file->f_mode & FMODE_READ)
6984 return ret;
6985
6986 /* If write only, the seq_file is just a stub */
6987 if (m)
6988 kfree(m->private);
6989 kfree(m);
6990
6991 return 0;
6992}
6993
6994static int tracing_buffers_open(struct inode *inode, struct file *filp);
6995static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6996 size_t count, loff_t *ppos);
6997static int tracing_buffers_release(struct inode *inode, struct file *file);
6998static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6999 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7000
7001static int snapshot_raw_open(struct inode *inode, struct file *filp)
7002{
7003 struct ftrace_buffer_info *info;
7004 int ret;
7005
7006 /* The following checks for tracefs lockdown */
7007 ret = tracing_buffers_open(inode, filp);
7008 if (ret < 0)
7009 return ret;
7010
7011 info = filp->private_data;
7012
7013 if (info->iter.trace->use_max_tr) {
7014 tracing_buffers_release(inode, filp);
7015 return -EBUSY;
7016 }
7017
7018 info->iter.snapshot = true;
7019 info->iter.trace_buffer = &info->iter.tr->max_buffer;
7020
7021 return ret;
7022}
7023
7024#endif /* CONFIG_TRACER_SNAPSHOT */
7025
7026
7027static const struct file_operations tracing_thresh_fops = {
7028 .open = tracing_open_generic,
7029 .read = tracing_thresh_read,
7030 .write = tracing_thresh_write,
7031 .llseek = generic_file_llseek,
7032};
7033
7034#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7035static const struct file_operations tracing_max_lat_fops = {
7036 .open = tracing_open_generic,
7037 .read = tracing_max_lat_read,
7038 .write = tracing_max_lat_write,
7039 .llseek = generic_file_llseek,
7040};
7041#endif
7042
7043static const struct file_operations set_tracer_fops = {
7044 .open = tracing_open_generic_tr,
7045 .read = tracing_set_trace_read,
7046 .write = tracing_set_trace_write,
7047 .llseek = generic_file_llseek,
7048 .release = tracing_release_generic_tr,
7049};
7050
7051static const struct file_operations tracing_pipe_fops = {
7052 .open = tracing_open_pipe,
7053 .poll = tracing_poll_pipe,
7054 .read = tracing_read_pipe,
7055 .splice_read = tracing_splice_read_pipe,
7056 .release = tracing_release_pipe,
7057 .llseek = no_llseek,
7058};
7059
7060static const struct file_operations tracing_entries_fops = {
7061 .open = tracing_open_generic_tr,
7062 .read = tracing_entries_read,
7063 .write = tracing_entries_write,
7064 .llseek = generic_file_llseek,
7065 .release = tracing_release_generic_tr,
7066};
7067
7068static const struct file_operations tracing_total_entries_fops = {
7069 .open = tracing_open_generic_tr,
7070 .read = tracing_total_entries_read,
7071 .llseek = generic_file_llseek,
7072 .release = tracing_release_generic_tr,
7073};
7074
7075static const struct file_operations tracing_free_buffer_fops = {
7076 .open = tracing_open_generic_tr,
7077 .write = tracing_free_buffer_write,
7078 .release = tracing_free_buffer_release,
7079};
7080
7081static const struct file_operations tracing_mark_fops = {
7082 .open = tracing_open_generic_tr,
7083 .write = tracing_mark_write,
7084 .llseek = generic_file_llseek,
7085 .release = tracing_release_generic_tr,
7086};
7087
7088static const struct file_operations tracing_mark_raw_fops = {
7089 .open = tracing_open_generic_tr,
7090 .write = tracing_mark_raw_write,
7091 .llseek = generic_file_llseek,
7092 .release = tracing_release_generic_tr,
7093};
7094
7095static const struct file_operations trace_clock_fops = {
7096 .open = tracing_clock_open,
7097 .read = seq_read,
7098 .llseek = seq_lseek,
7099 .release = tracing_single_release_tr,
7100 .write = tracing_clock_write,
7101};
7102
7103static const struct file_operations trace_time_stamp_mode_fops = {
7104 .open = tracing_time_stamp_mode_open,
7105 .read = seq_read,
7106 .llseek = seq_lseek,
7107 .release = tracing_single_release_tr,
7108};
7109
7110#ifdef CONFIG_TRACER_SNAPSHOT
7111static const struct file_operations snapshot_fops = {
7112 .open = tracing_snapshot_open,
7113 .read = seq_read,
7114 .write = tracing_snapshot_write,
7115 .llseek = tracing_lseek,
7116 .release = tracing_snapshot_release,
7117};
7118
7119static const struct file_operations snapshot_raw_fops = {
7120 .open = snapshot_raw_open,
7121 .read = tracing_buffers_read,
7122 .release = tracing_buffers_release,
7123 .splice_read = tracing_buffers_splice_read,
7124 .llseek = no_llseek,
7125};
7126
7127#endif /* CONFIG_TRACER_SNAPSHOT */
7128
7129#define TRACING_LOG_ERRS_MAX 8
7130#define TRACING_LOG_LOC_MAX 128
7131
7132#define CMD_PREFIX " Command: "
7133
7134struct err_info {
7135 const char **errs; /* ptr to loc-specific array of err strings */
7136 u8 type; /* index into errs -> specific err string */
7137 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7138 u64 ts;
7139};
7140
7141struct tracing_log_err {
7142 struct list_head list;
7143 struct err_info info;
7144 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7145 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7146};
7147
7148static DEFINE_MUTEX(tracing_err_log_lock);
7149
7150static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7151{
7152 struct tracing_log_err *err;
7153
7154 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7155 err = kzalloc(sizeof(*err), GFP_KERNEL);
7156 if (!err)
7157 err = ERR_PTR(-ENOMEM);
7158 else
7159 tr->n_err_log_entries++;
7160
7161 return err;
7162 }
7163
7164 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7165 list_del(&err->list);
7166
7167 return err;
7168}
7169
7170/**
7171 * err_pos - find the position of a string within a command for error careting
7172 * @cmd: The tracing command that caused the error
7173 * @str: The string to position the caret at within @cmd
7174 *
7175 * Finds the position of the first occurence of @str within @cmd. The
7176 * return value can be passed to tracing_log_err() for caret placement
7177 * within @cmd.
7178 *
7179 * Returns the index within @cmd of the first occurence of @str or 0
7180 * if @str was not found.
7181 */
7182unsigned int err_pos(char *cmd, const char *str)
7183{
7184 char *found;
7185
7186 if (WARN_ON(!strlen(cmd)))
7187 return 0;
7188
7189 found = strstr(cmd, str);
7190 if (found)
7191 return found - cmd;
7192
7193 return 0;
7194}
7195
7196/**
7197 * tracing_log_err - write an error to the tracing error log
7198 * @tr: The associated trace array for the error (NULL for top level array)
7199 * @loc: A string describing where the error occurred
7200 * @cmd: The tracing command that caused the error
7201 * @errs: The array of loc-specific static error strings
7202 * @type: The index into errs[], which produces the specific static err string
7203 * @pos: The position the caret should be placed in the cmd
7204 *
7205 * Writes an error into tracing/error_log of the form:
7206 *
7207 * <loc>: error: <text>
7208 * Command: <cmd>
7209 * ^
7210 *
7211 * tracing/error_log is a small log file containing the last
7212 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7213 * unless there has been a tracing error, and the error log can be
7214 * cleared and have its memory freed by writing the empty string in
7215 * truncation mode to it i.e. echo > tracing/error_log.
7216 *
7217 * NOTE: the @errs array along with the @type param are used to
7218 * produce a static error string - this string is not copied and saved
7219 * when the error is logged - only a pointer to it is saved. See
7220 * existing callers for examples of how static strings are typically
7221 * defined for use with tracing_log_err().
7222 */
7223void tracing_log_err(struct trace_array *tr,
7224 const char *loc, const char *cmd,
7225 const char **errs, u8 type, u8 pos)
7226{
7227 struct tracing_log_err *err;
7228
7229 if (!tr)
7230 tr = &global_trace;
7231
7232 mutex_lock(&tracing_err_log_lock);
7233 err = get_tracing_log_err(tr);
7234 if (PTR_ERR(err) == -ENOMEM) {
7235 mutex_unlock(&tracing_err_log_lock);
7236 return;
7237 }
7238
7239 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7240 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7241
7242 err->info.errs = errs;
7243 err->info.type = type;
7244 err->info.pos = pos;
7245 err->info.ts = local_clock();
7246
7247 list_add_tail(&err->list, &tr->err_log);
7248 mutex_unlock(&tracing_err_log_lock);
7249}
7250
7251static void clear_tracing_err_log(struct trace_array *tr)
7252{
7253 struct tracing_log_err *err, *next;
7254
7255 mutex_lock(&tracing_err_log_lock);
7256 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7257 list_del(&err->list);
7258 kfree(err);
7259 }
7260
7261 tr->n_err_log_entries = 0;
7262 mutex_unlock(&tracing_err_log_lock);
7263}
7264
7265static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7266{
7267 struct trace_array *tr = m->private;
7268
7269 mutex_lock(&tracing_err_log_lock);
7270
7271 return seq_list_start(&tr->err_log, *pos);
7272}
7273
7274static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7275{
7276 struct trace_array *tr = m->private;
7277
7278 return seq_list_next(v, &tr->err_log, pos);
7279}
7280
7281static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7282{
7283 mutex_unlock(&tracing_err_log_lock);
7284}
7285
7286static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7287{
7288 u8 i;
7289
7290 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7291 seq_putc(m, ' ');
7292 for (i = 0; i < pos; i++)
7293 seq_putc(m, ' ');
7294 seq_puts(m, "^\n");
7295}
7296
7297static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7298{
7299 struct tracing_log_err *err = v;
7300
7301 if (err) {
7302 const char *err_text = err->info.errs[err->info.type];
7303 u64 sec = err->info.ts;
7304 u32 nsec;
7305
7306 nsec = do_div(sec, NSEC_PER_SEC);
7307 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7308 err->loc, err_text);
7309 seq_printf(m, "%s", err->cmd);
7310 tracing_err_log_show_pos(m, err->info.pos);
7311 }
7312
7313 return 0;
7314}
7315
7316static const struct seq_operations tracing_err_log_seq_ops = {
7317 .start = tracing_err_log_seq_start,
7318 .next = tracing_err_log_seq_next,
7319 .stop = tracing_err_log_seq_stop,
7320 .show = tracing_err_log_seq_show
7321};
7322
7323static int tracing_err_log_open(struct inode *inode, struct file *file)
7324{
7325 struct trace_array *tr = inode->i_private;
7326 int ret = 0;
7327
7328 ret = tracing_check_open_get_tr(tr);
7329 if (ret)
7330 return ret;
7331
7332 /* If this file was opened for write, then erase contents */
7333 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7334 clear_tracing_err_log(tr);
7335
7336 if (file->f_mode & FMODE_READ) {
7337 ret = seq_open(file, &tracing_err_log_seq_ops);
7338 if (!ret) {
7339 struct seq_file *m = file->private_data;
7340 m->private = tr;
7341 } else {
7342 trace_array_put(tr);
7343 }
7344 }
7345 return ret;
7346}
7347
7348static ssize_t tracing_err_log_write(struct file *file,
7349 const char __user *buffer,
7350 size_t count, loff_t *ppos)
7351{
7352 return count;
7353}
7354
7355static int tracing_err_log_release(struct inode *inode, struct file *file)
7356{
7357 struct trace_array *tr = inode->i_private;
7358
7359 trace_array_put(tr);
7360
7361 if (file->f_mode & FMODE_READ)
7362 seq_release(inode, file);
7363
7364 return 0;
7365}
7366
7367static const struct file_operations tracing_err_log_fops = {
7368 .open = tracing_err_log_open,
7369 .write = tracing_err_log_write,
7370 .read = seq_read,
7371 .llseek = tracing_lseek,
7372 .release = tracing_err_log_release,
7373};
7374
7375static int tracing_buffers_open(struct inode *inode, struct file *filp)
7376{
7377 struct trace_array *tr = inode->i_private;
7378 struct ftrace_buffer_info *info;
7379 int ret;
7380
7381 ret = tracing_check_open_get_tr(tr);
7382 if (ret)
7383 return ret;
7384
7385 info = kzalloc(sizeof(*info), GFP_KERNEL);
7386 if (!info) {
7387 trace_array_put(tr);
7388 return -ENOMEM;
7389 }
7390
7391 mutex_lock(&trace_types_lock);
7392
7393 info->iter.tr = tr;
7394 info->iter.cpu_file = tracing_get_cpu(inode);
7395 info->iter.trace = tr->current_trace;
7396 info->iter.trace_buffer = &tr->trace_buffer;
7397 info->spare = NULL;
7398 /* Force reading ring buffer for first read */
7399 info->read = (unsigned int)-1;
7400
7401 filp->private_data = info;
7402
7403 tr->trace_ref++;
7404
7405 mutex_unlock(&trace_types_lock);
7406
7407 ret = nonseekable_open(inode, filp);
7408 if (ret < 0)
7409 trace_array_put(tr);
7410
7411 return ret;
7412}
7413
7414static __poll_t
7415tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7416{
7417 struct ftrace_buffer_info *info = filp->private_data;
7418 struct trace_iterator *iter = &info->iter;
7419
7420 return trace_poll(iter, filp, poll_table);
7421}
7422
7423static ssize_t
7424tracing_buffers_read(struct file *filp, char __user *ubuf,
7425 size_t count, loff_t *ppos)
7426{
7427 struct ftrace_buffer_info *info = filp->private_data;
7428 struct trace_iterator *iter = &info->iter;
7429 ssize_t ret = 0;
7430 ssize_t size;
7431
7432 if (!count)
7433 return 0;
7434
7435#ifdef CONFIG_TRACER_MAX_TRACE
7436 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7437 return -EBUSY;
7438#endif
7439
7440 if (!info->spare) {
7441 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7442 iter->cpu_file);
7443 if (IS_ERR(info->spare)) {
7444 ret = PTR_ERR(info->spare);
7445 info->spare = NULL;
7446 } else {
7447 info->spare_cpu = iter->cpu_file;
7448 }
7449 }
7450 if (!info->spare)
7451 return ret;
7452
7453 /* Do we have previous read data to read? */
7454 if (info->read < PAGE_SIZE)
7455 goto read;
7456
7457 again:
7458 trace_access_lock(iter->cpu_file);
7459 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7460 &info->spare,
7461 count,
7462 iter->cpu_file, 0);
7463 trace_access_unlock(iter->cpu_file);
7464
7465 if (ret < 0) {
7466 if (trace_empty(iter)) {
7467 if ((filp->f_flags & O_NONBLOCK))
7468 return -EAGAIN;
7469
7470 ret = wait_on_pipe(iter, 0);
7471 if (ret)
7472 return ret;
7473
7474 goto again;
7475 }
7476 return 0;
7477 }
7478
7479 info->read = 0;
7480 read:
7481 size = PAGE_SIZE - info->read;
7482 if (size > count)
7483 size = count;
7484
7485 ret = copy_to_user(ubuf, info->spare + info->read, size);
7486 if (ret == size)
7487 return -EFAULT;
7488
7489 size -= ret;
7490
7491 *ppos += size;
7492 info->read += size;
7493
7494 return size;
7495}
7496
7497static int tracing_buffers_release(struct inode *inode, struct file *file)
7498{
7499 struct ftrace_buffer_info *info = file->private_data;
7500 struct trace_iterator *iter = &info->iter;
7501
7502 mutex_lock(&trace_types_lock);
7503
7504 iter->tr->trace_ref--;
7505
7506 __trace_array_put(iter->tr);
7507
7508 if (info->spare)
7509 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7510 info->spare_cpu, info->spare);
7511 kfree(info);
7512
7513 mutex_unlock(&trace_types_lock);
7514
7515 return 0;
7516}
7517
7518struct buffer_ref {
7519 struct ring_buffer *buffer;
7520 void *page;
7521 int cpu;
7522 refcount_t refcount;
7523};
7524
7525static void buffer_ref_release(struct buffer_ref *ref)
7526{
7527 if (!refcount_dec_and_test(&ref->refcount))
7528 return;
7529 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7530 kfree(ref);
7531}
7532
7533static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7534 struct pipe_buffer *buf)
7535{
7536 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7537
7538 buffer_ref_release(ref);
7539 buf->private = 0;
7540}
7541
7542static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7543 struct pipe_buffer *buf)
7544{
7545 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7546
7547 if (refcount_read(&ref->refcount) > INT_MAX/2)
7548 return false;
7549
7550 refcount_inc(&ref->refcount);
7551 return true;
7552}
7553
7554/* Pipe buffer operations for a buffer. */
7555static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7556 .confirm = generic_pipe_buf_confirm,
7557 .release = buffer_pipe_buf_release,
7558 .steal = generic_pipe_buf_nosteal,
7559 .get = buffer_pipe_buf_get,
7560};
7561
7562/*
7563 * Callback from splice_to_pipe(), if we need to release some pages
7564 * at the end of the spd in case we error'ed out in filling the pipe.
7565 */
7566static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7567{
7568 struct buffer_ref *ref =
7569 (struct buffer_ref *)spd->partial[i].private;
7570
7571 buffer_ref_release(ref);
7572 spd->partial[i].private = 0;
7573}
7574
7575static ssize_t
7576tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7577 struct pipe_inode_info *pipe, size_t len,
7578 unsigned int flags)
7579{
7580 struct ftrace_buffer_info *info = file->private_data;
7581 struct trace_iterator *iter = &info->iter;
7582 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7583 struct page *pages_def[PIPE_DEF_BUFFERS];
7584 struct splice_pipe_desc spd = {
7585 .pages = pages_def,
7586 .partial = partial_def,
7587 .nr_pages_max = PIPE_DEF_BUFFERS,
7588 .ops = &buffer_pipe_buf_ops,
7589 .spd_release = buffer_spd_release,
7590 };
7591 struct buffer_ref *ref;
7592 int entries, i;
7593 ssize_t ret = 0;
7594
7595#ifdef CONFIG_TRACER_MAX_TRACE
7596 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7597 return -EBUSY;
7598#endif
7599
7600 if (*ppos & (PAGE_SIZE - 1))
7601 return -EINVAL;
7602
7603 if (len & (PAGE_SIZE - 1)) {
7604 if (len < PAGE_SIZE)
7605 return -EINVAL;
7606 len &= PAGE_MASK;
7607 }
7608
7609 if (splice_grow_spd(pipe, &spd))
7610 return -ENOMEM;
7611
7612 again:
7613 trace_access_lock(iter->cpu_file);
7614 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7615
7616 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7617 struct page *page;
7618 int r;
7619
7620 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7621 if (!ref) {
7622 ret = -ENOMEM;
7623 break;
7624 }
7625
7626 refcount_set(&ref->refcount, 1);
7627 ref->buffer = iter->trace_buffer->buffer;
7628 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7629 if (IS_ERR(ref->page)) {
7630 ret = PTR_ERR(ref->page);
7631 ref->page = NULL;
7632 kfree(ref);
7633 break;
7634 }
7635 ref->cpu = iter->cpu_file;
7636
7637 r = ring_buffer_read_page(ref->buffer, &ref->page,
7638 len, iter->cpu_file, 1);
7639 if (r < 0) {
7640 ring_buffer_free_read_page(ref->buffer, ref->cpu,
7641 ref->page);
7642 kfree(ref);
7643 break;
7644 }
7645
7646 page = virt_to_page(ref->page);
7647
7648 spd.pages[i] = page;
7649 spd.partial[i].len = PAGE_SIZE;
7650 spd.partial[i].offset = 0;
7651 spd.partial[i].private = (unsigned long)ref;
7652 spd.nr_pages++;
7653 *ppos += PAGE_SIZE;
7654
7655 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7656 }
7657
7658 trace_access_unlock(iter->cpu_file);
7659 spd.nr_pages = i;
7660
7661 /* did we read anything? */
7662 if (!spd.nr_pages) {
7663 if (ret)
7664 goto out;
7665
7666 ret = -EAGAIN;
7667 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7668 goto out;
7669
7670 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7671 if (ret)
7672 goto out;
7673
7674 goto again;
7675 }
7676
7677 ret = splice_to_pipe(pipe, &spd);
7678out:
7679 splice_shrink_spd(&spd);
7680
7681 return ret;
7682}
7683
7684static const struct file_operations tracing_buffers_fops = {
7685 .open = tracing_buffers_open,
7686 .read = tracing_buffers_read,
7687 .poll = tracing_buffers_poll,
7688 .release = tracing_buffers_release,
7689 .splice_read = tracing_buffers_splice_read,
7690 .llseek = no_llseek,
7691};
7692
7693static ssize_t
7694tracing_stats_read(struct file *filp, char __user *ubuf,
7695 size_t count, loff_t *ppos)
7696{
7697 struct inode *inode = file_inode(filp);
7698 struct trace_array *tr = inode->i_private;
7699 struct trace_buffer *trace_buf = &tr->trace_buffer;
7700 int cpu = tracing_get_cpu(inode);
7701 struct trace_seq *s;
7702 unsigned long cnt;
7703 unsigned long long t;
7704 unsigned long usec_rem;
7705
7706 s = kmalloc(sizeof(*s), GFP_KERNEL);
7707 if (!s)
7708 return -ENOMEM;
7709
7710 trace_seq_init(s);
7711
7712 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7713 trace_seq_printf(s, "entries: %ld\n", cnt);
7714
7715 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7716 trace_seq_printf(s, "overrun: %ld\n", cnt);
7717
7718 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7719 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7720
7721 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7722 trace_seq_printf(s, "bytes: %ld\n", cnt);
7723
7724 if (trace_clocks[tr->clock_id].in_ns) {
7725 /* local or global for trace_clock */
7726 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7727 usec_rem = do_div(t, USEC_PER_SEC);
7728 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7729 t, usec_rem);
7730
7731 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7732 usec_rem = do_div(t, USEC_PER_SEC);
7733 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7734 } else {
7735 /* counter or tsc mode for trace_clock */
7736 trace_seq_printf(s, "oldest event ts: %llu\n",
7737 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7738
7739 trace_seq_printf(s, "now ts: %llu\n",
7740 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7741 }
7742
7743 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7744 trace_seq_printf(s, "dropped events: %ld\n", cnt);
7745
7746 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7747 trace_seq_printf(s, "read events: %ld\n", cnt);
7748
7749 count = simple_read_from_buffer(ubuf, count, ppos,
7750 s->buffer, trace_seq_used(s));
7751
7752 kfree(s);
7753
7754 return count;
7755}
7756
7757static const struct file_operations tracing_stats_fops = {
7758 .open = tracing_open_generic_tr,
7759 .read = tracing_stats_read,
7760 .llseek = generic_file_llseek,
7761 .release = tracing_release_generic_tr,
7762};
7763
7764#ifdef CONFIG_DYNAMIC_FTRACE
7765
7766static ssize_t
7767tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7768 size_t cnt, loff_t *ppos)
7769{
7770 ssize_t ret;
7771 char *buf;
7772 int r;
7773
7774 /* 256 should be plenty to hold the amount needed */
7775 buf = kmalloc(256, GFP_KERNEL);
7776 if (!buf)
7777 return -ENOMEM;
7778
7779 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
7780 ftrace_update_tot_cnt,
7781 ftrace_number_of_pages,
7782 ftrace_number_of_groups);
7783
7784 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7785 kfree(buf);
7786 return ret;
7787}
7788
7789static const struct file_operations tracing_dyn_info_fops = {
7790 .open = tracing_open_generic,
7791 .read = tracing_read_dyn_info,
7792 .llseek = generic_file_llseek,
7793};
7794#endif /* CONFIG_DYNAMIC_FTRACE */
7795
7796#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7797static void
7798ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7799 struct trace_array *tr, struct ftrace_probe_ops *ops,
7800 void *data)
7801{
7802 tracing_snapshot_instance(tr);
7803}
7804
7805static void
7806ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7807 struct trace_array *tr, struct ftrace_probe_ops *ops,
7808 void *data)
7809{
7810 struct ftrace_func_mapper *mapper = data;
7811 long *count = NULL;
7812
7813 if (mapper)
7814 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7815
7816 if (count) {
7817
7818 if (*count <= 0)
7819 return;
7820
7821 (*count)--;
7822 }
7823
7824 tracing_snapshot_instance(tr);
7825}
7826
7827static int
7828ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7829 struct ftrace_probe_ops *ops, void *data)
7830{
7831 struct ftrace_func_mapper *mapper = data;
7832 long *count = NULL;
7833
7834 seq_printf(m, "%ps:", (void *)ip);
7835
7836 seq_puts(m, "snapshot");
7837
7838 if (mapper)
7839 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7840
7841 if (count)
7842 seq_printf(m, ":count=%ld\n", *count);
7843 else
7844 seq_puts(m, ":unlimited\n");
7845
7846 return 0;
7847}
7848
7849static int
7850ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7851 unsigned long ip, void *init_data, void **data)
7852{
7853 struct ftrace_func_mapper *mapper = *data;
7854
7855 if (!mapper) {
7856 mapper = allocate_ftrace_func_mapper();
7857 if (!mapper)
7858 return -ENOMEM;
7859 *data = mapper;
7860 }
7861
7862 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7863}
7864
7865static void
7866ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7867 unsigned long ip, void *data)
7868{
7869 struct ftrace_func_mapper *mapper = data;
7870
7871 if (!ip) {
7872 if (!mapper)
7873 return;
7874 free_ftrace_func_mapper(mapper, NULL);
7875 return;
7876 }
7877
7878 ftrace_func_mapper_remove_ip(mapper, ip);
7879}
7880
7881static struct ftrace_probe_ops snapshot_probe_ops = {
7882 .func = ftrace_snapshot,
7883 .print = ftrace_snapshot_print,
7884};
7885
7886static struct ftrace_probe_ops snapshot_count_probe_ops = {
7887 .func = ftrace_count_snapshot,
7888 .print = ftrace_snapshot_print,
7889 .init = ftrace_snapshot_init,
7890 .free = ftrace_snapshot_free,
7891};
7892
7893static int
7894ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7895 char *glob, char *cmd, char *param, int enable)
7896{
7897 struct ftrace_probe_ops *ops;
7898 void *count = (void *)-1;
7899 char *number;
7900 int ret;
7901
7902 if (!tr)
7903 return -ENODEV;
7904
7905 /* hash funcs only work with set_ftrace_filter */
7906 if (!enable)
7907 return -EINVAL;
7908
7909 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
7910
7911 if (glob[0] == '!')
7912 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7913
7914 if (!param)
7915 goto out_reg;
7916
7917 number = strsep(&param, ":");
7918
7919 if (!strlen(number))
7920 goto out_reg;
7921
7922 /*
7923 * We use the callback data field (which is a pointer)
7924 * as our counter.
7925 */
7926 ret = kstrtoul(number, 0, (unsigned long *)&count);
7927 if (ret)
7928 return ret;
7929
7930 out_reg:
7931 ret = tracing_alloc_snapshot_instance(tr);
7932 if (ret < 0)
7933 goto out;
7934
7935 ret = register_ftrace_function_probe(glob, tr, ops, count);
7936
7937 out:
7938 return ret < 0 ? ret : 0;
7939}
7940
7941static struct ftrace_func_command ftrace_snapshot_cmd = {
7942 .name = "snapshot",
7943 .func = ftrace_trace_snapshot_callback,
7944};
7945
7946static __init int register_snapshot_cmd(void)
7947{
7948 return register_ftrace_command(&ftrace_snapshot_cmd);
7949}
7950#else
7951static inline __init int register_snapshot_cmd(void) { return 0; }
7952#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7953
7954static struct dentry *tracing_get_dentry(struct trace_array *tr)
7955{
7956 if (WARN_ON(!tr->dir))
7957 return ERR_PTR(-ENODEV);
7958
7959 /* Top directory uses NULL as the parent */
7960 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7961 return NULL;
7962
7963 /* All sub buffers have a descriptor */
7964 return tr->dir;
7965}
7966
7967static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7968{
7969 struct dentry *d_tracer;
7970
7971 if (tr->percpu_dir)
7972 return tr->percpu_dir;
7973
7974 d_tracer = tracing_get_dentry(tr);
7975 if (IS_ERR(d_tracer))
7976 return NULL;
7977
7978 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7979
7980 WARN_ONCE(!tr->percpu_dir,
7981 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7982
7983 return tr->percpu_dir;
7984}
7985
7986static struct dentry *
7987trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7988 void *data, long cpu, const struct file_operations *fops)
7989{
7990 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7991
7992 if (ret) /* See tracing_get_cpu() */
7993 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7994 return ret;
7995}
7996
7997static void
7998tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7999{
8000 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8001 struct dentry *d_cpu;
8002 char cpu_dir[30]; /* 30 characters should be more than enough */
8003
8004 if (!d_percpu)
8005 return;
8006
8007 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8008 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8009 if (!d_cpu) {
8010 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8011 return;
8012 }
8013
8014 /* per cpu trace_pipe */
8015 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8016 tr, cpu, &tracing_pipe_fops);
8017
8018 /* per cpu trace */
8019 trace_create_cpu_file("trace", 0644, d_cpu,
8020 tr, cpu, &tracing_fops);
8021
8022 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8023 tr, cpu, &tracing_buffers_fops);
8024
8025 trace_create_cpu_file("stats", 0444, d_cpu,
8026 tr, cpu, &tracing_stats_fops);
8027
8028 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8029 tr, cpu, &tracing_entries_fops);
8030
8031#ifdef CONFIG_TRACER_SNAPSHOT
8032 trace_create_cpu_file("snapshot", 0644, d_cpu,
8033 tr, cpu, &snapshot_fops);
8034
8035 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8036 tr, cpu, &snapshot_raw_fops);
8037#endif
8038}
8039
8040#ifdef CONFIG_FTRACE_SELFTEST
8041/* Let selftest have access to static functions in this file */
8042#include "trace_selftest.c"
8043#endif
8044
8045static ssize_t
8046trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8047 loff_t *ppos)
8048{
8049 struct trace_option_dentry *topt = filp->private_data;
8050 char *buf;
8051
8052 if (topt->flags->val & topt->opt->bit)
8053 buf = "1\n";
8054 else
8055 buf = "0\n";
8056
8057 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8058}
8059
8060static ssize_t
8061trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8062 loff_t *ppos)
8063{
8064 struct trace_option_dentry *topt = filp->private_data;
8065 unsigned long val;
8066 int ret;
8067
8068 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8069 if (ret)
8070 return ret;
8071
8072 if (val != 0 && val != 1)
8073 return -EINVAL;
8074
8075 if (!!(topt->flags->val & topt->opt->bit) != val) {
8076 mutex_lock(&trace_types_lock);
8077 ret = __set_tracer_option(topt->tr, topt->flags,
8078 topt->opt, !val);
8079 mutex_unlock(&trace_types_lock);
8080 if (ret)
8081 return ret;
8082 }
8083
8084 *ppos += cnt;
8085
8086 return cnt;
8087}
8088
8089static int tracing_open_options(struct inode *inode, struct file *filp)
8090{
8091 struct trace_option_dentry *topt = inode->i_private;
8092 int ret;
8093
8094 ret = tracing_check_open_get_tr(topt->tr);
8095 if (ret)
8096 return ret;
8097
8098 filp->private_data = inode->i_private;
8099 return 0;
8100}
8101
8102static int tracing_release_options(struct inode *inode, struct file *file)
8103{
8104 struct trace_option_dentry *topt = file->private_data;
8105
8106 trace_array_put(topt->tr);
8107 return 0;
8108}
8109
8110static const struct file_operations trace_options_fops = {
8111 .open = tracing_open_options,
8112 .read = trace_options_read,
8113 .write = trace_options_write,
8114 .llseek = generic_file_llseek,
8115 .release = tracing_release_options,
8116};
8117
8118/*
8119 * In order to pass in both the trace_array descriptor as well as the index
8120 * to the flag that the trace option file represents, the trace_array
8121 * has a character array of trace_flags_index[], which holds the index
8122 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8123 * The address of this character array is passed to the flag option file
8124 * read/write callbacks.
8125 *
8126 * In order to extract both the index and the trace_array descriptor,
8127 * get_tr_index() uses the following algorithm.
8128 *
8129 * idx = *ptr;
8130 *
8131 * As the pointer itself contains the address of the index (remember
8132 * index[1] == 1).
8133 *
8134 * Then to get the trace_array descriptor, by subtracting that index
8135 * from the ptr, we get to the start of the index itself.
8136 *
8137 * ptr - idx == &index[0]
8138 *
8139 * Then a simple container_of() from that pointer gets us to the
8140 * trace_array descriptor.
8141 */
8142static void get_tr_index(void *data, struct trace_array **ptr,
8143 unsigned int *pindex)
8144{
8145 *pindex = *(unsigned char *)data;
8146
8147 *ptr = container_of(data - *pindex, struct trace_array,
8148 trace_flags_index);
8149}
8150
8151static ssize_t
8152trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8153 loff_t *ppos)
8154{
8155 void *tr_index = filp->private_data;
8156 struct trace_array *tr;
8157 unsigned int index;
8158 char *buf;
8159
8160 get_tr_index(tr_index, &tr, &index);
8161
8162 if (tr->trace_flags & (1 << index))
8163 buf = "1\n";
8164 else
8165 buf = "0\n";
8166
8167 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8168}
8169
8170static ssize_t
8171trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8172 loff_t *ppos)
8173{
8174 void *tr_index = filp->private_data;
8175 struct trace_array *tr;
8176 unsigned int index;
8177 unsigned long val;
8178 int ret;
8179
8180 get_tr_index(tr_index, &tr, &index);
8181
8182 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8183 if (ret)
8184 return ret;
8185
8186 if (val != 0 && val != 1)
8187 return -EINVAL;
8188
8189 mutex_lock(&event_mutex);
8190 mutex_lock(&trace_types_lock);
8191 ret = set_tracer_flag(tr, 1 << index, val);
8192 mutex_unlock(&trace_types_lock);
8193 mutex_unlock(&event_mutex);
8194
8195 if (ret < 0)
8196 return ret;
8197
8198 *ppos += cnt;
8199
8200 return cnt;
8201}
8202
8203static const struct file_operations trace_options_core_fops = {
8204 .open = tracing_open_generic,
8205 .read = trace_options_core_read,
8206 .write = trace_options_core_write,
8207 .llseek = generic_file_llseek,
8208};
8209
8210struct dentry *trace_create_file(const char *name,
8211 umode_t mode,
8212 struct dentry *parent,
8213 void *data,
8214 const struct file_operations *fops)
8215{
8216 struct dentry *ret;
8217
8218 ret = tracefs_create_file(name, mode, parent, data, fops);
8219 if (!ret)
8220 pr_warn("Could not create tracefs '%s' entry\n", name);
8221
8222 return ret;
8223}
8224
8225
8226static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8227{
8228 struct dentry *d_tracer;
8229
8230 if (tr->options)
8231 return tr->options;
8232
8233 d_tracer = tracing_get_dentry(tr);
8234 if (IS_ERR(d_tracer))
8235 return NULL;
8236
8237 tr->options = tracefs_create_dir("options", d_tracer);
8238 if (!tr->options) {
8239 pr_warn("Could not create tracefs directory 'options'\n");
8240 return NULL;
8241 }
8242
8243 return tr->options;
8244}
8245
8246static void
8247create_trace_option_file(struct trace_array *tr,
8248 struct trace_option_dentry *topt,
8249 struct tracer_flags *flags,
8250 struct tracer_opt *opt)
8251{
8252 struct dentry *t_options;
8253
8254 t_options = trace_options_init_dentry(tr);
8255 if (!t_options)
8256 return;
8257
8258 topt->flags = flags;
8259 topt->opt = opt;
8260 topt->tr = tr;
8261
8262 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8263 &trace_options_fops);
8264
8265}
8266
8267static void
8268create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8269{
8270 struct trace_option_dentry *topts;
8271 struct trace_options *tr_topts;
8272 struct tracer_flags *flags;
8273 struct tracer_opt *opts;
8274 int cnt;
8275 int i;
8276
8277 if (!tracer)
8278 return;
8279
8280 flags = tracer->flags;
8281
8282 if (!flags || !flags->opts)
8283 return;
8284
8285 /*
8286 * If this is an instance, only create flags for tracers
8287 * the instance may have.
8288 */
8289 if (!trace_ok_for_array(tracer, tr))
8290 return;
8291
8292 for (i = 0; i < tr->nr_topts; i++) {
8293 /* Make sure there's no duplicate flags. */
8294 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8295 return;
8296 }
8297
8298 opts = flags->opts;
8299
8300 for (cnt = 0; opts[cnt].name; cnt++)
8301 ;
8302
8303 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8304 if (!topts)
8305 return;
8306
8307 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8308 GFP_KERNEL);
8309 if (!tr_topts) {
8310 kfree(topts);
8311 return;
8312 }
8313
8314 tr->topts = tr_topts;
8315 tr->topts[tr->nr_topts].tracer = tracer;
8316 tr->topts[tr->nr_topts].topts = topts;
8317 tr->nr_topts++;
8318
8319 for (cnt = 0; opts[cnt].name; cnt++) {
8320 create_trace_option_file(tr, &topts[cnt], flags,
8321 &opts[cnt]);
8322 WARN_ONCE(topts[cnt].entry == NULL,
8323 "Failed to create trace option: %s",
8324 opts[cnt].name);
8325 }
8326}
8327
8328static struct dentry *
8329create_trace_option_core_file(struct trace_array *tr,
8330 const char *option, long index)
8331{
8332 struct dentry *t_options;
8333
8334 t_options = trace_options_init_dentry(tr);
8335 if (!t_options)
8336 return NULL;
8337
8338 return trace_create_file(option, 0644, t_options,
8339 (void *)&tr->trace_flags_index[index],
8340 &trace_options_core_fops);
8341}
8342
8343static void create_trace_options_dir(struct trace_array *tr)
8344{
8345 struct dentry *t_options;
8346 bool top_level = tr == &global_trace;
8347 int i;
8348
8349 t_options = trace_options_init_dentry(tr);
8350 if (!t_options)
8351 return;
8352
8353 for (i = 0; trace_options[i]; i++) {
8354 if (top_level ||
8355 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8356 create_trace_option_core_file(tr, trace_options[i], i);
8357 }
8358}
8359
8360static ssize_t
8361rb_simple_read(struct file *filp, char __user *ubuf,
8362 size_t cnt, loff_t *ppos)
8363{
8364 struct trace_array *tr = filp->private_data;
8365 char buf[64];
8366 int r;
8367
8368 r = tracer_tracing_is_on(tr);
8369 r = sprintf(buf, "%d\n", r);
8370
8371 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8372}
8373
8374static ssize_t
8375rb_simple_write(struct file *filp, const char __user *ubuf,
8376 size_t cnt, loff_t *ppos)
8377{
8378 struct trace_array *tr = filp->private_data;
8379 struct ring_buffer *buffer = tr->trace_buffer.buffer;
8380 unsigned long val;
8381 int ret;
8382
8383 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8384 if (ret)
8385 return ret;
8386
8387 if (buffer) {
8388 mutex_lock(&trace_types_lock);
8389 if (!!val == tracer_tracing_is_on(tr)) {
8390 val = 0; /* do nothing */
8391 } else if (val) {
8392 tracer_tracing_on(tr);
8393 if (tr->current_trace->start)
8394 tr->current_trace->start(tr);
8395 } else {
8396 tracer_tracing_off(tr);
8397 if (tr->current_trace->stop)
8398 tr->current_trace->stop(tr);
8399 }
8400 mutex_unlock(&trace_types_lock);
8401 }
8402
8403 (*ppos)++;
8404
8405 return cnt;
8406}
8407
8408static const struct file_operations rb_simple_fops = {
8409 .open = tracing_open_generic_tr,
8410 .read = rb_simple_read,
8411 .write = rb_simple_write,
8412 .release = tracing_release_generic_tr,
8413 .llseek = default_llseek,
8414};
8415
8416static ssize_t
8417buffer_percent_read(struct file *filp, char __user *ubuf,
8418 size_t cnt, loff_t *ppos)
8419{
8420 struct trace_array *tr = filp->private_data;
8421 char buf[64];
8422 int r;
8423
8424 r = tr->buffer_percent;
8425 r = sprintf(buf, "%d\n", r);
8426
8427 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8428}
8429
8430static ssize_t
8431buffer_percent_write(struct file *filp, const char __user *ubuf,
8432 size_t cnt, loff_t *ppos)
8433{
8434 struct trace_array *tr = filp->private_data;
8435 unsigned long val;
8436 int ret;
8437
8438 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8439 if (ret)
8440 return ret;
8441
8442 if (val > 100)
8443 return -EINVAL;
8444
8445 tr->buffer_percent = val;
8446
8447 (*ppos)++;
8448
8449 return cnt;
8450}
8451
8452static const struct file_operations buffer_percent_fops = {
8453 .open = tracing_open_generic_tr,
8454 .read = buffer_percent_read,
8455 .write = buffer_percent_write,
8456 .release = tracing_release_generic_tr,
8457 .llseek = default_llseek,
8458};
8459
8460static struct dentry *trace_instance_dir;
8461
8462static void
8463init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8464
8465static int
8466allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8467{
8468 enum ring_buffer_flags rb_flags;
8469
8470 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8471
8472 buf->tr = tr;
8473
8474 buf->buffer = ring_buffer_alloc(size, rb_flags);
8475 if (!buf->buffer)
8476 return -ENOMEM;
8477
8478 buf->data = alloc_percpu(struct trace_array_cpu);
8479 if (!buf->data) {
8480 ring_buffer_free(buf->buffer);
8481 buf->buffer = NULL;
8482 return -ENOMEM;
8483 }
8484
8485 /* Allocate the first page for all buffers */
8486 set_buffer_entries(&tr->trace_buffer,
8487 ring_buffer_size(tr->trace_buffer.buffer, 0));
8488
8489 return 0;
8490}
8491
8492static int allocate_trace_buffers(struct trace_array *tr, int size)
8493{
8494 int ret;
8495
8496 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8497 if (ret)
8498 return ret;
8499
8500#ifdef CONFIG_TRACER_MAX_TRACE
8501 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8502 allocate_snapshot ? size : 1);
8503 if (WARN_ON(ret)) {
8504 ring_buffer_free(tr->trace_buffer.buffer);
8505 tr->trace_buffer.buffer = NULL;
8506 free_percpu(tr->trace_buffer.data);
8507 tr->trace_buffer.data = NULL;
8508 return -ENOMEM;
8509 }
8510 tr->allocated_snapshot = allocate_snapshot;
8511
8512 /*
8513 * Only the top level trace array gets its snapshot allocated
8514 * from the kernel command line.
8515 */
8516 allocate_snapshot = false;
8517#endif
8518
8519 /*
8520 * Because of some magic with the way alloc_percpu() works on
8521 * x86_64, we need to synchronize the pgd of all the tables,
8522 * otherwise the trace events that happen in x86_64 page fault
8523 * handlers can't cope with accessing the chance that a
8524 * alloc_percpu()'d memory might be touched in the page fault trace
8525 * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
8526 * calls in tracing, because something might get triggered within a
8527 * page fault trace event!
8528 */
8529 vmalloc_sync_mappings();
8530
8531 return 0;
8532}
8533
8534static void free_trace_buffer(struct trace_buffer *buf)
8535{
8536 if (buf->buffer) {
8537 ring_buffer_free(buf->buffer);
8538 buf->buffer = NULL;
8539 free_percpu(buf->data);
8540 buf->data = NULL;
8541 }
8542}
8543
8544static void free_trace_buffers(struct trace_array *tr)
8545{
8546 if (!tr)
8547 return;
8548
8549 free_trace_buffer(&tr->trace_buffer);
8550
8551#ifdef CONFIG_TRACER_MAX_TRACE
8552 free_trace_buffer(&tr->max_buffer);
8553#endif
8554}
8555
8556static void init_trace_flags_index(struct trace_array *tr)
8557{
8558 int i;
8559
8560 /* Used by the trace options files */
8561 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8562 tr->trace_flags_index[i] = i;
8563}
8564
8565static void __update_tracer_options(struct trace_array *tr)
8566{
8567 struct tracer *t;
8568
8569 for (t = trace_types; t; t = t->next)
8570 add_tracer_options(tr, t);
8571}
8572
8573static void update_tracer_options(struct trace_array *tr)
8574{
8575 mutex_lock(&trace_types_lock);
8576 tracer_options_updated = true;
8577 __update_tracer_options(tr);
8578 mutex_unlock(&trace_types_lock);
8579}
8580
8581struct trace_array *trace_array_create(const char *name)
8582{
8583 struct trace_array *tr;
8584 int ret;
8585
8586 mutex_lock(&event_mutex);
8587 mutex_lock(&trace_types_lock);
8588
8589 ret = -EEXIST;
8590 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8591 if (tr->name && strcmp(tr->name, name) == 0)
8592 goto out_unlock;
8593 }
8594
8595 ret = -ENOMEM;
8596 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8597 if (!tr)
8598 goto out_unlock;
8599
8600 tr->name = kstrdup(name, GFP_KERNEL);
8601 if (!tr->name)
8602 goto out_free_tr;
8603
8604 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8605 goto out_free_tr;
8606
8607 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8608
8609 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8610
8611 raw_spin_lock_init(&tr->start_lock);
8612
8613 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8614
8615 tr->current_trace = &nop_trace;
8616
8617 INIT_LIST_HEAD(&tr->systems);
8618 INIT_LIST_HEAD(&tr->events);
8619 INIT_LIST_HEAD(&tr->hist_vars);
8620 INIT_LIST_HEAD(&tr->err_log);
8621
8622 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8623 goto out_free_tr;
8624
8625 tr->dir = tracefs_create_dir(name, trace_instance_dir);
8626 if (!tr->dir)
8627 goto out_free_tr;
8628
8629 ret = event_trace_add_tracer(tr->dir, tr);
8630 if (ret) {
8631 tracefs_remove_recursive(tr->dir);
8632 goto out_free_tr;
8633 }
8634
8635 ftrace_init_trace_array(tr);
8636
8637 init_tracer_tracefs(tr, tr->dir);
8638 init_trace_flags_index(tr);
8639 __update_tracer_options(tr);
8640
8641 list_add(&tr->list, &ftrace_trace_arrays);
8642
8643 mutex_unlock(&trace_types_lock);
8644 mutex_unlock(&event_mutex);
8645
8646 return tr;
8647
8648 out_free_tr:
8649 free_trace_buffers(tr);
8650 free_cpumask_var(tr->tracing_cpumask);
8651 kfree(tr->name);
8652 kfree(tr);
8653
8654 out_unlock:
8655 mutex_unlock(&trace_types_lock);
8656 mutex_unlock(&event_mutex);
8657
8658 return ERR_PTR(ret);
8659}
8660EXPORT_SYMBOL_GPL(trace_array_create);
8661
8662static int instance_mkdir(const char *name)
8663{
8664 return PTR_ERR_OR_ZERO(trace_array_create(name));
8665}
8666
8667static int __remove_instance(struct trace_array *tr)
8668{
8669 int i;
8670
8671 if (tr->ref || (tr->current_trace && tr->trace_ref))
8672 return -EBUSY;
8673
8674 list_del(&tr->list);
8675
8676 /* Disable all the flags that were enabled coming in */
8677 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8678 if ((1 << i) & ZEROED_TRACE_FLAGS)
8679 set_tracer_flag(tr, 1 << i, 0);
8680 }
8681
8682 tracing_set_nop(tr);
8683 clear_ftrace_function_probes(tr);
8684 event_trace_del_tracer(tr);
8685 ftrace_clear_pids(tr);
8686 ftrace_destroy_function_files(tr);
8687 tracefs_remove_recursive(tr->dir);
8688 free_trace_buffers(tr);
8689 clear_tracing_err_log(tr);
8690
8691 for (i = 0; i < tr->nr_topts; i++) {
8692 kfree(tr->topts[i].topts);
8693 }
8694 kfree(tr->topts);
8695
8696 free_cpumask_var(tr->tracing_cpumask);
8697 kfree(tr->name);
8698 kfree(tr);
8699 tr = NULL;
8700
8701 return 0;
8702}
8703
8704int trace_array_destroy(struct trace_array *this_tr)
8705{
8706 struct trace_array *tr;
8707 int ret;
8708
8709 if (!this_tr)
8710 return -EINVAL;
8711
8712 mutex_lock(&event_mutex);
8713 mutex_lock(&trace_types_lock);
8714
8715 ret = -ENODEV;
8716
8717 /* Making sure trace array exists before destroying it. */
8718 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8719 if (tr == this_tr) {
8720 ret = __remove_instance(tr);
8721 break;
8722 }
8723 }
8724
8725 mutex_unlock(&trace_types_lock);
8726 mutex_unlock(&event_mutex);
8727
8728 return ret;
8729}
8730EXPORT_SYMBOL_GPL(trace_array_destroy);
8731
8732static int instance_rmdir(const char *name)
8733{
8734 struct trace_array *tr;
8735 int ret;
8736
8737 mutex_lock(&event_mutex);
8738 mutex_lock(&trace_types_lock);
8739
8740 ret = -ENODEV;
8741 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8742 if (tr->name && strcmp(tr->name, name) == 0) {
8743 ret = __remove_instance(tr);
8744 break;
8745 }
8746 }
8747
8748 mutex_unlock(&trace_types_lock);
8749 mutex_unlock(&event_mutex);
8750
8751 return ret;
8752}
8753
8754static __init void create_trace_instances(struct dentry *d_tracer)
8755{
8756 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8757 instance_mkdir,
8758 instance_rmdir);
8759 if (WARN_ON(!trace_instance_dir))
8760 return;
8761}
8762
8763static void
8764init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8765{
8766 struct trace_event_file *file;
8767 int cpu;
8768
8769 trace_create_file("available_tracers", 0444, d_tracer,
8770 tr, &show_traces_fops);
8771
8772 trace_create_file("current_tracer", 0644, d_tracer,
8773 tr, &set_tracer_fops);
8774
8775 trace_create_file("tracing_cpumask", 0644, d_tracer,
8776 tr, &tracing_cpumask_fops);
8777
8778 trace_create_file("trace_options", 0644, d_tracer,
8779 tr, &tracing_iter_fops);
8780
8781 trace_create_file("trace", 0644, d_tracer,
8782 tr, &tracing_fops);
8783
8784 trace_create_file("trace_pipe", 0444, d_tracer,
8785 tr, &tracing_pipe_fops);
8786
8787 trace_create_file("buffer_size_kb", 0644, d_tracer,
8788 tr, &tracing_entries_fops);
8789
8790 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8791 tr, &tracing_total_entries_fops);
8792
8793 trace_create_file("free_buffer", 0200, d_tracer,
8794 tr, &tracing_free_buffer_fops);
8795
8796 trace_create_file("trace_marker", 0220, d_tracer,
8797 tr, &tracing_mark_fops);
8798
8799 file = __find_event_file(tr, "ftrace", "print");
8800 if (file && file->dir)
8801 trace_create_file("trigger", 0644, file->dir, file,
8802 &event_trigger_fops);
8803 tr->trace_marker_file = file;
8804
8805 trace_create_file("trace_marker_raw", 0220, d_tracer,
8806 tr, &tracing_mark_raw_fops);
8807
8808 trace_create_file("trace_clock", 0644, d_tracer, tr,
8809 &trace_clock_fops);
8810
8811 trace_create_file("tracing_on", 0644, d_tracer,
8812 tr, &rb_simple_fops);
8813
8814 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8815 &trace_time_stamp_mode_fops);
8816
8817 tr->buffer_percent = 50;
8818
8819 trace_create_file("buffer_percent", 0444, d_tracer,
8820 tr, &buffer_percent_fops);
8821
8822 create_trace_options_dir(tr);
8823
8824#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8825 trace_create_file("tracing_max_latency", 0644, d_tracer,
8826 &tr->max_latency, &tracing_max_lat_fops);
8827#endif
8828
8829 if (ftrace_create_function_files(tr, d_tracer))
8830 WARN(1, "Could not allocate function filter files");
8831
8832#ifdef CONFIG_TRACER_SNAPSHOT
8833 trace_create_file("snapshot", 0644, d_tracer,
8834 tr, &snapshot_fops);
8835#endif
8836
8837 trace_create_file("error_log", 0644, d_tracer,
8838 tr, &tracing_err_log_fops);
8839
8840 for_each_tracing_cpu(cpu)
8841 tracing_init_tracefs_percpu(tr, cpu);
8842
8843 ftrace_init_tracefs(tr, d_tracer);
8844}
8845
8846static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8847{
8848 struct vfsmount *mnt;
8849 struct file_system_type *type;
8850
8851 /*
8852 * To maintain backward compatibility for tools that mount
8853 * debugfs to get to the tracing facility, tracefs is automatically
8854 * mounted to the debugfs/tracing directory.
8855 */
8856 type = get_fs_type("tracefs");
8857 if (!type)
8858 return NULL;
8859 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8860 put_filesystem(type);
8861 if (IS_ERR(mnt))
8862 return NULL;
8863 mntget(mnt);
8864
8865 return mnt;
8866}
8867
8868/**
8869 * tracing_init_dentry - initialize top level trace array
8870 *
8871 * This is called when creating files or directories in the tracing
8872 * directory. It is called via fs_initcall() by any of the boot up code
8873 * and expects to return the dentry of the top level tracing directory.
8874 */
8875struct dentry *tracing_init_dentry(void)
8876{
8877 struct trace_array *tr = &global_trace;
8878
8879 if (security_locked_down(LOCKDOWN_TRACEFS)) {
8880 pr_warning("Tracing disabled due to lockdown\n");
8881 return ERR_PTR(-EPERM);
8882 }
8883
8884 /* The top level trace array uses NULL as parent */
8885 if (tr->dir)
8886 return NULL;
8887
8888 if (WARN_ON(!tracefs_initialized()) ||
8889 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8890 WARN_ON(!debugfs_initialized())))
8891 return ERR_PTR(-ENODEV);
8892
8893 /*
8894 * As there may still be users that expect the tracing
8895 * files to exist in debugfs/tracing, we must automount
8896 * the tracefs file system there, so older tools still
8897 * work with the newer kerenl.
8898 */
8899 tr->dir = debugfs_create_automount("tracing", NULL,
8900 trace_automount, NULL);
8901
8902 return NULL;
8903}
8904
8905extern struct trace_eval_map *__start_ftrace_eval_maps[];
8906extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8907
8908static void __init trace_eval_init(void)
8909{
8910 int len;
8911
8912 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8913 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8914}
8915
8916#ifdef CONFIG_MODULES
8917static void trace_module_add_evals(struct module *mod)
8918{
8919 if (!mod->num_trace_evals)
8920 return;
8921
8922 /*
8923 * Modules with bad taint do not have events created, do
8924 * not bother with enums either.
8925 */
8926 if (trace_module_has_bad_taint(mod))
8927 return;
8928
8929 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8930}
8931
8932#ifdef CONFIG_TRACE_EVAL_MAP_FILE
8933static void trace_module_remove_evals(struct module *mod)
8934{
8935 union trace_eval_map_item *map;
8936 union trace_eval_map_item **last = &trace_eval_maps;
8937
8938 if (!mod->num_trace_evals)
8939 return;
8940
8941 mutex_lock(&trace_eval_mutex);
8942
8943 map = trace_eval_maps;
8944
8945 while (map) {
8946 if (map->head.mod == mod)
8947 break;
8948 map = trace_eval_jmp_to_tail(map);
8949 last = &map->tail.next;
8950 map = map->tail.next;
8951 }
8952 if (!map)
8953 goto out;
8954
8955 *last = trace_eval_jmp_to_tail(map)->tail.next;
8956 kfree(map);
8957 out:
8958 mutex_unlock(&trace_eval_mutex);
8959}
8960#else
8961static inline void trace_module_remove_evals(struct module *mod) { }
8962#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8963
8964static int trace_module_notify(struct notifier_block *self,
8965 unsigned long val, void *data)
8966{
8967 struct module *mod = data;
8968
8969 switch (val) {
8970 case MODULE_STATE_COMING:
8971 trace_module_add_evals(mod);
8972 break;
8973 case MODULE_STATE_GOING:
8974 trace_module_remove_evals(mod);
8975 break;
8976 }
8977
8978 return 0;
8979}
8980
8981static struct notifier_block trace_module_nb = {
8982 .notifier_call = trace_module_notify,
8983 .priority = 0,
8984};
8985#endif /* CONFIG_MODULES */
8986
8987static __init int tracer_init_tracefs(void)
8988{
8989 struct dentry *d_tracer;
8990
8991 trace_access_lock_init();
8992
8993 d_tracer = tracing_init_dentry();
8994 if (IS_ERR(d_tracer))
8995 return 0;
8996
8997 event_trace_init();
8998
8999 init_tracer_tracefs(&global_trace, d_tracer);
9000 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
9001
9002 trace_create_file("tracing_thresh", 0644, d_tracer,
9003 &global_trace, &tracing_thresh_fops);
9004
9005 trace_create_file("README", 0444, d_tracer,
9006 NULL, &tracing_readme_fops);
9007
9008 trace_create_file("saved_cmdlines", 0444, d_tracer,
9009 NULL, &tracing_saved_cmdlines_fops);
9010
9011 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
9012 NULL, &tracing_saved_cmdlines_size_fops);
9013
9014 trace_create_file("saved_tgids", 0444, d_tracer,
9015 NULL, &tracing_saved_tgids_fops);
9016
9017 trace_eval_init();
9018
9019 trace_create_eval_file(d_tracer);
9020
9021#ifdef CONFIG_MODULES
9022 register_module_notifier(&trace_module_nb);
9023#endif
9024
9025#ifdef CONFIG_DYNAMIC_FTRACE
9026 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
9027 NULL, &tracing_dyn_info_fops);
9028#endif
9029
9030 create_trace_instances(d_tracer);
9031
9032 update_tracer_options(&global_trace);
9033
9034 return 0;
9035}
9036
9037static int trace_panic_handler(struct notifier_block *this,
9038 unsigned long event, void *unused)
9039{
9040 if (ftrace_dump_on_oops)
9041 ftrace_dump(ftrace_dump_on_oops);
9042 return NOTIFY_OK;
9043}
9044
9045static struct notifier_block trace_panic_notifier = {
9046 .notifier_call = trace_panic_handler,
9047 .next = NULL,
9048 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9049};
9050
9051static int trace_die_handler(struct notifier_block *self,
9052 unsigned long val,
9053 void *data)
9054{
9055 switch (val) {
9056 case DIE_OOPS:
9057 if (ftrace_dump_on_oops)
9058 ftrace_dump(ftrace_dump_on_oops);
9059 break;
9060 default:
9061 break;
9062 }
9063 return NOTIFY_OK;
9064}
9065
9066static struct notifier_block trace_die_notifier = {
9067 .notifier_call = trace_die_handler,
9068 .priority = 200
9069};
9070
9071/*
9072 * printk is set to max of 1024, we really don't need it that big.
9073 * Nothing should be printing 1000 characters anyway.
9074 */
9075#define TRACE_MAX_PRINT 1000
9076
9077/*
9078 * Define here KERN_TRACE so that we have one place to modify
9079 * it if we decide to change what log level the ftrace dump
9080 * should be at.
9081 */
9082#define KERN_TRACE KERN_EMERG
9083
9084void
9085trace_printk_seq(struct trace_seq *s)
9086{
9087 /* Probably should print a warning here. */
9088 if (s->seq.len >= TRACE_MAX_PRINT)
9089 s->seq.len = TRACE_MAX_PRINT;
9090
9091 /*
9092 * More paranoid code. Although the buffer size is set to
9093 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9094 * an extra layer of protection.
9095 */
9096 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9097 s->seq.len = s->seq.size - 1;
9098
9099 /* should be zero ended, but we are paranoid. */
9100 s->buffer[s->seq.len] = 0;
9101
9102 printk(KERN_TRACE "%s", s->buffer);
9103
9104 trace_seq_init(s);
9105}
9106
9107void trace_init_global_iter(struct trace_iterator *iter)
9108{
9109 iter->tr = &global_trace;
9110 iter->trace = iter->tr->current_trace;
9111 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9112 iter->trace_buffer = &global_trace.trace_buffer;
9113
9114 if (iter->trace && iter->trace->open)
9115 iter->trace->open(iter);
9116
9117 /* Annotate start of buffers if we had overruns */
9118 if (ring_buffer_overruns(iter->trace_buffer->buffer))
9119 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9120
9121 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9122 if (trace_clocks[iter->tr->clock_id].in_ns)
9123 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9124}
9125
9126void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9127{
9128 /* use static because iter can be a bit big for the stack */
9129 static struct trace_iterator iter;
9130 static atomic_t dump_running;
9131 struct trace_array *tr = &global_trace;
9132 unsigned int old_userobj;
9133 unsigned long flags;
9134 int cnt = 0, cpu;
9135
9136 /* Only allow one dump user at a time. */
9137 if (atomic_inc_return(&dump_running) != 1) {
9138 atomic_dec(&dump_running);
9139 return;
9140 }
9141
9142 /*
9143 * Always turn off tracing when we dump.
9144 * We don't need to show trace output of what happens
9145 * between multiple crashes.
9146 *
9147 * If the user does a sysrq-z, then they can re-enable
9148 * tracing with echo 1 > tracing_on.
9149 */
9150 tracing_off();
9151
9152 local_irq_save(flags);
9153 printk_nmi_direct_enter();
9154
9155 /* Simulate the iterator */
9156 trace_init_global_iter(&iter);
9157
9158 for_each_tracing_cpu(cpu) {
9159 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9160 }
9161
9162 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9163
9164 /* don't look at user memory in panic mode */
9165 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9166
9167 switch (oops_dump_mode) {
9168 case DUMP_ALL:
9169 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9170 break;
9171 case DUMP_ORIG:
9172 iter.cpu_file = raw_smp_processor_id();
9173 break;
9174 case DUMP_NONE:
9175 goto out_enable;
9176 default:
9177 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9178 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9179 }
9180
9181 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9182
9183 /* Did function tracer already get disabled? */
9184 if (ftrace_is_dead()) {
9185 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9186 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9187 }
9188
9189 /*
9190 * We need to stop all tracing on all CPUS to read the
9191 * the next buffer. This is a bit expensive, but is
9192 * not done often. We fill all what we can read,
9193 * and then release the locks again.
9194 */
9195
9196 while (!trace_empty(&iter)) {
9197
9198 if (!cnt)
9199 printk(KERN_TRACE "---------------------------------\n");
9200
9201 cnt++;
9202
9203 trace_iterator_reset(&iter);
9204 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9205
9206 if (trace_find_next_entry_inc(&iter) != NULL) {
9207 int ret;
9208
9209 ret = print_trace_line(&iter);
9210 if (ret != TRACE_TYPE_NO_CONSUME)
9211 trace_consume(&iter);
9212 }
9213 touch_nmi_watchdog();
9214
9215 trace_printk_seq(&iter.seq);
9216 }
9217
9218 if (!cnt)
9219 printk(KERN_TRACE " (ftrace buffer empty)\n");
9220 else
9221 printk(KERN_TRACE "---------------------------------\n");
9222
9223 out_enable:
9224 tr->trace_flags |= old_userobj;
9225
9226 for_each_tracing_cpu(cpu) {
9227 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
9228 }
9229 atomic_dec(&dump_running);
9230 printk_nmi_direct_exit();
9231 local_irq_restore(flags);
9232}
9233EXPORT_SYMBOL_GPL(ftrace_dump);
9234
9235int trace_run_command(const char *buf, int (*createfn)(int, char **))
9236{
9237 char **argv;
9238 int argc, ret;
9239
9240 argc = 0;
9241 ret = 0;
9242 argv = argv_split(GFP_KERNEL, buf, &argc);
9243 if (!argv)
9244 return -ENOMEM;
9245
9246 if (argc)
9247 ret = createfn(argc, argv);
9248
9249 argv_free(argv);
9250
9251 return ret;
9252}
9253
9254#define WRITE_BUFSIZE 4096
9255
9256ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9257 size_t count, loff_t *ppos,
9258 int (*createfn)(int, char **))
9259{
9260 char *kbuf, *buf, *tmp;
9261 int ret = 0;
9262 size_t done = 0;
9263 size_t size;
9264
9265 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9266 if (!kbuf)
9267 return -ENOMEM;
9268
9269 while (done < count) {
9270 size = count - done;
9271
9272 if (size >= WRITE_BUFSIZE)
9273 size = WRITE_BUFSIZE - 1;
9274
9275 if (copy_from_user(kbuf, buffer + done, size)) {
9276 ret = -EFAULT;
9277 goto out;
9278 }
9279 kbuf[size] = '\0';
9280 buf = kbuf;
9281 do {
9282 tmp = strchr(buf, '\n');
9283 if (tmp) {
9284 *tmp = '\0';
9285 size = tmp - buf + 1;
9286 } else {
9287 size = strlen(buf);
9288 if (done + size < count) {
9289 if (buf != kbuf)
9290 break;
9291 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9292 pr_warn("Line length is too long: Should be less than %d\n",
9293 WRITE_BUFSIZE - 2);
9294 ret = -EINVAL;
9295 goto out;
9296 }
9297 }
9298 done += size;
9299
9300 /* Remove comments */
9301 tmp = strchr(buf, '#');
9302
9303 if (tmp)
9304 *tmp = '\0';
9305
9306 ret = trace_run_command(buf, createfn);
9307 if (ret)
9308 goto out;
9309 buf += size;
9310
9311 } while (done < count);
9312 }
9313 ret = done;
9314
9315out:
9316 kfree(kbuf);
9317
9318 return ret;
9319}
9320
9321__init static int tracer_alloc_buffers(void)
9322{
9323 int ring_buf_size;
9324 int ret = -ENOMEM;
9325
9326
9327 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9328 pr_warning("Tracing disabled due to lockdown\n");
9329 return -EPERM;
9330 }
9331
9332 /*
9333 * Make sure we don't accidently add more trace options
9334 * than we have bits for.
9335 */
9336 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9337
9338 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9339 goto out;
9340
9341 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9342 goto out_free_buffer_mask;
9343
9344 /* Only allocate trace_printk buffers if a trace_printk exists */
9345 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9346 /* Must be called before global_trace.buffer is allocated */
9347 trace_printk_init_buffers();
9348
9349 /* To save memory, keep the ring buffer size to its minimum */
9350 if (ring_buffer_expanded)
9351 ring_buf_size = trace_buf_size;
9352 else
9353 ring_buf_size = 1;
9354
9355 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9356 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9357
9358 raw_spin_lock_init(&global_trace.start_lock);
9359
9360 /*
9361 * The prepare callbacks allocates some memory for the ring buffer. We
9362 * don't free the buffer if the if the CPU goes down. If we were to free
9363 * the buffer, then the user would lose any trace that was in the
9364 * buffer. The memory will be removed once the "instance" is removed.
9365 */
9366 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9367 "trace/RB:preapre", trace_rb_cpu_prepare,
9368 NULL);
9369 if (ret < 0)
9370 goto out_free_cpumask;
9371 /* Used for event triggers */
9372 ret = -ENOMEM;
9373 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9374 if (!temp_buffer)
9375 goto out_rm_hp_state;
9376
9377 if (trace_create_savedcmd() < 0)
9378 goto out_free_temp_buffer;
9379
9380 /* TODO: make the number of buffers hot pluggable with CPUS */
9381 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9382 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9383 WARN_ON(1);
9384 goto out_free_savedcmd;
9385 }
9386
9387 if (global_trace.buffer_disabled)
9388 tracing_off();
9389
9390 if (trace_boot_clock) {
9391 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9392 if (ret < 0)
9393 pr_warn("Trace clock %s not defined, going back to default\n",
9394 trace_boot_clock);
9395 }
9396
9397 /*
9398 * register_tracer() might reference current_trace, so it
9399 * needs to be set before we register anything. This is
9400 * just a bootstrap of current_trace anyway.
9401 */
9402 global_trace.current_trace = &nop_trace;
9403
9404 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9405
9406 ftrace_init_global_array_ops(&global_trace);
9407
9408 init_trace_flags_index(&global_trace);
9409
9410 register_tracer(&nop_trace);
9411
9412 /* Function tracing may start here (via kernel command line) */
9413 init_function_trace();
9414
9415 /* All seems OK, enable tracing */
9416 tracing_disabled = 0;
9417
9418 atomic_notifier_chain_register(&panic_notifier_list,
9419 &trace_panic_notifier);
9420
9421 register_die_notifier(&trace_die_notifier);
9422
9423 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9424
9425 INIT_LIST_HEAD(&global_trace.systems);
9426 INIT_LIST_HEAD(&global_trace.events);
9427 INIT_LIST_HEAD(&global_trace.hist_vars);
9428 INIT_LIST_HEAD(&global_trace.err_log);
9429 list_add(&global_trace.list, &ftrace_trace_arrays);
9430
9431 apply_trace_boot_options();
9432
9433 register_snapshot_cmd();
9434
9435 return 0;
9436
9437out_free_savedcmd:
9438 free_saved_cmdlines_buffer(savedcmd);
9439out_free_temp_buffer:
9440 ring_buffer_free(temp_buffer);
9441out_rm_hp_state:
9442 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9443out_free_cpumask:
9444 free_cpumask_var(global_trace.tracing_cpumask);
9445out_free_buffer_mask:
9446 free_cpumask_var(tracing_buffer_mask);
9447out:
9448 return ret;
9449}
9450
9451void __init early_trace_init(void)
9452{
9453 if (tracepoint_printk) {
9454 tracepoint_print_iter =
9455 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9456 if (WARN_ON(!tracepoint_print_iter))
9457 tracepoint_printk = 0;
9458 else
9459 static_key_enable(&tracepoint_printk_key.key);
9460 }
9461 tracer_alloc_buffers();
9462
9463 init_events();
9464}
9465
9466void __init trace_init(void)
9467{
9468 trace_event_init();
9469}
9470
9471__init static int clear_boot_tracer(void)
9472{
9473 /*
9474 * The default tracer at boot buffer is an init section.
9475 * This function is called in lateinit. If we did not
9476 * find the boot tracer, then clear it out, to prevent
9477 * later registration from accessing the buffer that is
9478 * about to be freed.
9479 */
9480 if (!default_bootup_tracer)
9481 return 0;
9482
9483 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9484 default_bootup_tracer);
9485 default_bootup_tracer = NULL;
9486
9487 return 0;
9488}
9489
9490fs_initcall(tracer_init_tracefs);
9491late_initcall_sync(clear_boot_tracer);
9492
9493#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9494__init static int tracing_set_default_clock(void)
9495{
9496 /* sched_clock_stable() is determined in late_initcall */
9497 if (!trace_boot_clock && !sched_clock_stable()) {
9498 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9499 pr_warn("Can not set tracing clock due to lockdown\n");
9500 return -EPERM;
9501 }
9502
9503 printk(KERN_WARNING
9504 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9505 "If you want to keep using the local clock, then add:\n"
9506 " \"trace_clock=local\"\n"
9507 "on the kernel command line\n");
9508 tracing_set_clock(&global_trace, "global");
9509 }
9510
9511 return 0;
9512}
9513late_initcall_sync(tracing_set_default_clock);
9514#endif