root/kernel/trace/trace_stack.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. print_max_stack
  2. check_stack
  3. stack_trace_call
  4. stack_max_size_read
  5. stack_max_size_write
  6. __next
  7. t_next
  8. t_start
  9. t_stop
  10. trace_lookup_stack
  11. print_disabled
  12. t_show
  13. stack_trace_open
  14. stack_trace_filter_open
  15. stack_trace_sysctl
  16. enable_stacktrace
  17. stack_trace_init

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
   4  *
   5  */
   6 #include <linux/sched/task_stack.h>
   7 #include <linux/stacktrace.h>
   8 #include <linux/security.h>
   9 #include <linux/kallsyms.h>
  10 #include <linux/seq_file.h>
  11 #include <linux/spinlock.h>
  12 #include <linux/uaccess.h>
  13 #include <linux/ftrace.h>
  14 #include <linux/module.h>
  15 #include <linux/sysctl.h>
  16 #include <linux/init.h>
  17 
  18 #include <asm/setup.h>
  19 
  20 #include "trace.h"
  21 
  22 #define STACK_TRACE_ENTRIES 500
  23 
  24 static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES];
  25 static unsigned stack_trace_index[STACK_TRACE_ENTRIES];
  26 
  27 static unsigned int stack_trace_nr_entries;
  28 static unsigned long stack_trace_max_size;
  29 static arch_spinlock_t stack_trace_max_lock =
  30         (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
  31 
  32 DEFINE_PER_CPU(int, disable_stack_tracer);
  33 static DEFINE_MUTEX(stack_sysctl_mutex);
  34 
  35 int stack_tracer_enabled;
  36 
  37 static void print_max_stack(void)
  38 {
  39         long i;
  40         int size;
  41 
  42         pr_emerg("        Depth    Size   Location    (%d entries)\n"
  43                            "        -----    ----   --------\n",
  44                            stack_trace_nr_entries);
  45 
  46         for (i = 0; i < stack_trace_nr_entries; i++) {
  47                 if (i + 1 == stack_trace_nr_entries)
  48                         size = stack_trace_index[i];
  49                 else
  50                         size = stack_trace_index[i] - stack_trace_index[i+1];
  51 
  52                 pr_emerg("%3ld) %8d   %5d   %pS\n", i, stack_trace_index[i],
  53                                 size, (void *)stack_dump_trace[i]);
  54         }
  55 }
  56 
  57 /*
  58  * The stack tracer looks for a maximum stack at each call from a function. It
  59  * registers a callback from ftrace, and in that callback it examines the stack
  60  * size. It determines the stack size from the variable passed in, which is the
  61  * address of a local variable in the stack_trace_call() callback function.
  62  * The stack size is calculated by the address of the local variable to the top
  63  * of the current stack. If that size is smaller than the currently saved max
  64  * stack size, nothing more is done.
  65  *
  66  * If the size of the stack is greater than the maximum recorded size, then the
  67  * following algorithm takes place.
  68  *
  69  * For architectures (like x86) that store the function's return address before
  70  * saving the function's local variables, the stack will look something like
  71  * this:
  72  *
  73  *   [ top of stack ]
  74  *    0: sys call entry frame
  75  *   10: return addr to entry code
  76  *   11: start of sys_foo frame
  77  *   20: return addr to sys_foo
  78  *   21: start of kernel_func_bar frame
  79  *   30: return addr to kernel_func_bar
  80  *   31: [ do trace stack here ]
  81  *
  82  * The save_stack_trace() is called returning all the functions it finds in the
  83  * current stack. Which would be (from the bottom of the stack to the top):
  84  *
  85  *   return addr to kernel_func_bar
  86  *   return addr to sys_foo
  87  *   return addr to entry code
  88  *
  89  * Now to figure out how much each of these functions' local variable size is,
  90  * a search of the stack is made to find these values. When a match is made, it
  91  * is added to the stack_dump_trace[] array. The offset into the stack is saved
  92  * in the stack_trace_index[] array. The above example would show:
  93  *
  94  *        stack_dump_trace[]        |   stack_trace_index[]
  95  *        ------------------        +   -------------------
  96  *  return addr to kernel_func_bar  |          30
  97  *  return addr to sys_foo          |          20
  98  *  return addr to entry            |          10
  99  *
 100  * The print_max_stack() function above, uses these values to print the size of
 101  * each function's portion of the stack.
 102  *
 103  *  for (i = 0; i < nr_entries; i++) {
 104  *     size = i == nr_entries - 1 ? stack_trace_index[i] :
 105  *                    stack_trace_index[i] - stack_trace_index[i+1]
 106  *     print "%d %d %d %s\n", i, stack_trace_index[i], size, stack_dump_trace[i]);
 107  *  }
 108  *
 109  * The above shows
 110  *
 111  *     depth size  location
 112  *     ----- ----  --------
 113  *  0    30   10   kernel_func_bar
 114  *  1    20   10   sys_foo
 115  *  2    10   10   entry code
 116  *
 117  * Now for architectures that might save the return address after the functions
 118  * local variables (saving the link register before calling nested functions),
 119  * this will cause the stack to look a little different:
 120  *
 121  * [ top of stack ]
 122  *  0: sys call entry frame
 123  * 10: start of sys_foo_frame
 124  * 19: return addr to entry code << lr saved before calling kernel_func_bar
 125  * 20: start of kernel_func_bar frame
 126  * 29: return addr to sys_foo_frame << lr saved before calling next function
 127  * 30: [ do trace stack here ]
 128  *
 129  * Although the functions returned by save_stack_trace() may be the same, the
 130  * placement in the stack will be different. Using the same algorithm as above
 131  * would yield:
 132  *
 133  *        stack_dump_trace[]        |   stack_trace_index[]
 134  *        ------------------        +   -------------------
 135  *  return addr to kernel_func_bar  |          30
 136  *  return addr to sys_foo          |          29
 137  *  return addr to entry            |          19
 138  *
 139  * Where the mapping is off by one:
 140  *
 141  *   kernel_func_bar stack frame size is 29 - 19 not 30 - 29!
 142  *
 143  * To fix this, if the architecture sets ARCH_RET_ADDR_AFTER_LOCAL_VARS the
 144  * values in stack_trace_index[] are shifted by one to and the number of
 145  * stack trace entries is decremented by one.
 146  *
 147  *        stack_dump_trace[]        |   stack_trace_index[]
 148  *        ------------------        +   -------------------
 149  *  return addr to kernel_func_bar  |          29
 150  *  return addr to sys_foo          |          19
 151  *
 152  * Although the entry function is not displayed, the first function (sys_foo)
 153  * will still include the stack size of it.
 154  */
 155 static void check_stack(unsigned long ip, unsigned long *stack)
 156 {
 157         unsigned long this_size, flags; unsigned long *p, *top, *start;
 158         static int tracer_frame;
 159         int frame_size = READ_ONCE(tracer_frame);
 160         int i, x;
 161 
 162         this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
 163         this_size = THREAD_SIZE - this_size;
 164         /* Remove the frame of the tracer */
 165         this_size -= frame_size;
 166 
 167         if (this_size <= stack_trace_max_size)
 168                 return;
 169 
 170         /* we do not handle interrupt stacks yet */
 171         if (!object_is_on_stack(stack))
 172                 return;
 173 
 174         /* Can't do this from NMI context (can cause deadlocks) */
 175         if (in_nmi())
 176                 return;
 177 
 178         local_irq_save(flags);
 179         arch_spin_lock(&stack_trace_max_lock);
 180 
 181         /* In case another CPU set the tracer_frame on us */
 182         if (unlikely(!frame_size))
 183                 this_size -= tracer_frame;
 184 
 185         /* a race could have already updated it */
 186         if (this_size <= stack_trace_max_size)
 187                 goto out;
 188 
 189         stack_trace_max_size = this_size;
 190 
 191         stack_trace_nr_entries = stack_trace_save(stack_dump_trace,
 192                                                ARRAY_SIZE(stack_dump_trace) - 1,
 193                                                0);
 194 
 195         /* Skip over the overhead of the stack tracer itself */
 196         for (i = 0; i < stack_trace_nr_entries; i++) {
 197                 if (stack_dump_trace[i] == ip)
 198                         break;
 199         }
 200 
 201         /*
 202          * Some archs may not have the passed in ip in the dump.
 203          * If that happens, we need to show everything.
 204          */
 205         if (i == stack_trace_nr_entries)
 206                 i = 0;
 207 
 208         /*
 209          * Now find where in the stack these are.
 210          */
 211         x = 0;
 212         start = stack;
 213         top = (unsigned long *)
 214                 (((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
 215 
 216         /*
 217          * Loop through all the entries. One of the entries may
 218          * for some reason be missed on the stack, so we may
 219          * have to account for them. If they are all there, this
 220          * loop will only happen once. This code only takes place
 221          * on a new max, so it is far from a fast path.
 222          */
 223         while (i < stack_trace_nr_entries) {
 224                 int found = 0;
 225 
 226                 stack_trace_index[x] = this_size;
 227                 p = start;
 228 
 229                 for (; p < top && i < stack_trace_nr_entries; p++) {
 230                         /*
 231                          * The READ_ONCE_NOCHECK is used to let KASAN know that
 232                          * this is not a stack-out-of-bounds error.
 233                          */
 234                         if ((READ_ONCE_NOCHECK(*p)) == stack_dump_trace[i]) {
 235                                 stack_dump_trace[x] = stack_dump_trace[i++];
 236                                 this_size = stack_trace_index[x++] =
 237                                         (top - p) * sizeof(unsigned long);
 238                                 found = 1;
 239                                 /* Start the search from here */
 240                                 start = p + 1;
 241                                 /*
 242                                  * We do not want to show the overhead
 243                                  * of the stack tracer stack in the
 244                                  * max stack. If we haven't figured
 245                                  * out what that is, then figure it out
 246                                  * now.
 247                                  */
 248                                 if (unlikely(!tracer_frame)) {
 249                                         tracer_frame = (p - stack) *
 250                                                 sizeof(unsigned long);
 251                                         stack_trace_max_size -= tracer_frame;
 252                                 }
 253                         }
 254                 }
 255 
 256                 if (!found)
 257                         i++;
 258         }
 259 
 260 #ifdef ARCH_FTRACE_SHIFT_STACK_TRACER
 261         /*
 262          * Some archs will store the link register before calling
 263          * nested functions. This means the saved return address
 264          * comes after the local storage, and we need to shift
 265          * for that.
 266          */
 267         if (x > 1) {
 268                 memmove(&stack_trace_index[0], &stack_trace_index[1],
 269                         sizeof(stack_trace_index[0]) * (x - 1));
 270                 x--;
 271         }
 272 #endif
 273 
 274         stack_trace_nr_entries = x;
 275 
 276         if (task_stack_end_corrupted(current)) {
 277                 print_max_stack();
 278                 BUG();
 279         }
 280 
 281  out:
 282         arch_spin_unlock(&stack_trace_max_lock);
 283         local_irq_restore(flags);
 284 }
 285 
 286 /* Some archs may not define MCOUNT_INSN_SIZE */
 287 #ifndef MCOUNT_INSN_SIZE
 288 # define MCOUNT_INSN_SIZE 0
 289 #endif
 290 
 291 static void
 292 stack_trace_call(unsigned long ip, unsigned long parent_ip,
 293                  struct ftrace_ops *op, struct pt_regs *pt_regs)
 294 {
 295         unsigned long stack;
 296 
 297         preempt_disable_notrace();
 298 
 299         /* no atomic needed, we only modify this variable by this cpu */
 300         __this_cpu_inc(disable_stack_tracer);
 301         if (__this_cpu_read(disable_stack_tracer) != 1)
 302                 goto out;
 303 
 304         /* If rcu is not watching, then save stack trace can fail */
 305         if (!rcu_is_watching())
 306                 goto out;
 307 
 308         ip += MCOUNT_INSN_SIZE;
 309 
 310         check_stack(ip, &stack);
 311 
 312  out:
 313         __this_cpu_dec(disable_stack_tracer);
 314         /* prevent recursion in schedule */
 315         preempt_enable_notrace();
 316 }
 317 
 318 static struct ftrace_ops trace_ops __read_mostly =
 319 {
 320         .func = stack_trace_call,
 321         .flags = FTRACE_OPS_FL_RECURSION_SAFE,
 322 };
 323 
 324 static ssize_t
 325 stack_max_size_read(struct file *filp, char __user *ubuf,
 326                     size_t count, loff_t *ppos)
 327 {
 328         unsigned long *ptr = filp->private_data;
 329         char buf[64];
 330         int r;
 331 
 332         r = snprintf(buf, sizeof(buf), "%ld\n", *ptr);
 333         if (r > sizeof(buf))
 334                 r = sizeof(buf);
 335         return simple_read_from_buffer(ubuf, count, ppos, buf, r);
 336 }
 337 
 338 static ssize_t
 339 stack_max_size_write(struct file *filp, const char __user *ubuf,
 340                      size_t count, loff_t *ppos)
 341 {
 342         long *ptr = filp->private_data;
 343         unsigned long val, flags;
 344         int ret;
 345 
 346         ret = kstrtoul_from_user(ubuf, count, 10, &val);
 347         if (ret)
 348                 return ret;
 349 
 350         local_irq_save(flags);
 351 
 352         /*
 353          * In case we trace inside arch_spin_lock() or after (NMI),
 354          * we will cause circular lock, so we also need to increase
 355          * the percpu disable_stack_tracer here.
 356          */
 357         __this_cpu_inc(disable_stack_tracer);
 358 
 359         arch_spin_lock(&stack_trace_max_lock);
 360         *ptr = val;
 361         arch_spin_unlock(&stack_trace_max_lock);
 362 
 363         __this_cpu_dec(disable_stack_tracer);
 364         local_irq_restore(flags);
 365 
 366         return count;
 367 }
 368 
 369 static const struct file_operations stack_max_size_fops = {
 370         .open           = tracing_open_generic,
 371         .read           = stack_max_size_read,
 372         .write          = stack_max_size_write,
 373         .llseek         = default_llseek,
 374 };
 375 
 376 static void *
 377 __next(struct seq_file *m, loff_t *pos)
 378 {
 379         long n = *pos - 1;
 380 
 381         if (n >= stack_trace_nr_entries)
 382                 return NULL;
 383 
 384         m->private = (void *)n;
 385         return &m->private;
 386 }
 387 
 388 static void *
 389 t_next(struct seq_file *m, void *v, loff_t *pos)
 390 {
 391         (*pos)++;
 392         return __next(m, pos);
 393 }
 394 
 395 static void *t_start(struct seq_file *m, loff_t *pos)
 396 {
 397         local_irq_disable();
 398 
 399         __this_cpu_inc(disable_stack_tracer);
 400 
 401         arch_spin_lock(&stack_trace_max_lock);
 402 
 403         if (*pos == 0)
 404                 return SEQ_START_TOKEN;
 405 
 406         return __next(m, pos);
 407 }
 408 
 409 static void t_stop(struct seq_file *m, void *p)
 410 {
 411         arch_spin_unlock(&stack_trace_max_lock);
 412 
 413         __this_cpu_dec(disable_stack_tracer);
 414 
 415         local_irq_enable();
 416 }
 417 
 418 static void trace_lookup_stack(struct seq_file *m, long i)
 419 {
 420         unsigned long addr = stack_dump_trace[i];
 421 
 422         seq_printf(m, "%pS\n", (void *)addr);
 423 }
 424 
 425 static void print_disabled(struct seq_file *m)
 426 {
 427         seq_puts(m, "#\n"
 428                  "#  Stack tracer disabled\n"
 429                  "#\n"
 430                  "# To enable the stack tracer, either add 'stacktrace' to the\n"
 431                  "# kernel command line\n"
 432                  "# or 'echo 1 > /proc/sys/kernel/stack_tracer_enabled'\n"
 433                  "#\n");
 434 }
 435 
 436 static int t_show(struct seq_file *m, void *v)
 437 {
 438         long i;
 439         int size;
 440 
 441         if (v == SEQ_START_TOKEN) {
 442                 seq_printf(m, "        Depth    Size   Location"
 443                            "    (%d entries)\n"
 444                            "        -----    ----   --------\n",
 445                            stack_trace_nr_entries);
 446 
 447                 if (!stack_tracer_enabled && !stack_trace_max_size)
 448                         print_disabled(m);
 449 
 450                 return 0;
 451         }
 452 
 453         i = *(long *)v;
 454 
 455         if (i >= stack_trace_nr_entries)
 456                 return 0;
 457 
 458         if (i + 1 == stack_trace_nr_entries)
 459                 size = stack_trace_index[i];
 460         else
 461                 size = stack_trace_index[i] - stack_trace_index[i+1];
 462 
 463         seq_printf(m, "%3ld) %8d   %5d   ", i, stack_trace_index[i], size);
 464 
 465         trace_lookup_stack(m, i);
 466 
 467         return 0;
 468 }
 469 
 470 static const struct seq_operations stack_trace_seq_ops = {
 471         .start          = t_start,
 472         .next           = t_next,
 473         .stop           = t_stop,
 474         .show           = t_show,
 475 };
 476 
 477 static int stack_trace_open(struct inode *inode, struct file *file)
 478 {
 479         int ret;
 480 
 481         ret = security_locked_down(LOCKDOWN_TRACEFS);
 482         if (ret)
 483                 return ret;
 484 
 485         return seq_open(file, &stack_trace_seq_ops);
 486 }
 487 
 488 static const struct file_operations stack_trace_fops = {
 489         .open           = stack_trace_open,
 490         .read           = seq_read,
 491         .llseek         = seq_lseek,
 492         .release        = seq_release,
 493 };
 494 
 495 #ifdef CONFIG_DYNAMIC_FTRACE
 496 
 497 static int
 498 stack_trace_filter_open(struct inode *inode, struct file *file)
 499 {
 500         struct ftrace_ops *ops = inode->i_private;
 501 
 502         /* Checks for tracefs lockdown */
 503         return ftrace_regex_open(ops, FTRACE_ITER_FILTER,
 504                                  inode, file);
 505 }
 506 
 507 static const struct file_operations stack_trace_filter_fops = {
 508         .open = stack_trace_filter_open,
 509         .read = seq_read,
 510         .write = ftrace_filter_write,
 511         .llseek = tracing_lseek,
 512         .release = ftrace_regex_release,
 513 };
 514 
 515 #endif /* CONFIG_DYNAMIC_FTRACE */
 516 
 517 int
 518 stack_trace_sysctl(struct ctl_table *table, int write,
 519                    void __user *buffer, size_t *lenp,
 520                    loff_t *ppos)
 521 {
 522         int was_enabled;
 523         int ret;
 524 
 525         mutex_lock(&stack_sysctl_mutex);
 526         was_enabled = !!stack_tracer_enabled;
 527 
 528         ret = proc_dointvec(table, write, buffer, lenp, ppos);
 529 
 530         if (ret || !write || (was_enabled == !!stack_tracer_enabled))
 531                 goto out;
 532 
 533         if (stack_tracer_enabled)
 534                 register_ftrace_function(&trace_ops);
 535         else
 536                 unregister_ftrace_function(&trace_ops);
 537  out:
 538         mutex_unlock(&stack_sysctl_mutex);
 539         return ret;
 540 }
 541 
 542 static char stack_trace_filter_buf[COMMAND_LINE_SIZE+1] __initdata;
 543 
 544 static __init int enable_stacktrace(char *str)
 545 {
 546         int len;
 547 
 548         if ((len = str_has_prefix(str, "_filter=")))
 549                 strncpy(stack_trace_filter_buf, str + len, COMMAND_LINE_SIZE);
 550 
 551         stack_tracer_enabled = 1;
 552         return 1;
 553 }
 554 __setup("stacktrace", enable_stacktrace);
 555 
 556 static __init int stack_trace_init(void)
 557 {
 558         struct dentry *d_tracer;
 559 
 560         d_tracer = tracing_init_dentry();
 561         if (IS_ERR(d_tracer))
 562                 return 0;
 563 
 564         trace_create_file("stack_max_size", 0644, d_tracer,
 565                         &stack_trace_max_size, &stack_max_size_fops);
 566 
 567         trace_create_file("stack_trace", 0444, d_tracer,
 568                         NULL, &stack_trace_fops);
 569 
 570 #ifdef CONFIG_DYNAMIC_FTRACE
 571         trace_create_file("stack_trace_filter", 0644, d_tracer,
 572                           &trace_ops, &stack_trace_filter_fops);
 573 #endif
 574 
 575         if (stack_trace_filter_buf[0])
 576                 ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
 577 
 578         if (stack_tracer_enabled)
 579                 register_ftrace_function(&trace_ops);
 580 
 581         return 0;
 582 }
 583 
 584 device_initcall(stack_trace_init);

/* [<][>][^][v][top][bottom][index][help] */