root/tools/perf/examples/bpf/augmented_raw_syscalls.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. augmented__output
  2. augmented_arg__read_str
  3. SEC
  4. SEC
  5. SEC
  6. SEC
  7. SEC
  8. SEC
  9. SEC
  10. SEC
  11. SEC

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
   4  *
   5  * Test it with:
   6  *
   7  * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
   8  *
   9  * This exactly matches what is marshalled into the raw_syscall:sys_enter
  10  * payload expected by the 'perf trace' beautifiers.
  11  *
  12  * For now it just uses the existing tracepoint augmentation code in 'perf
  13  * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
  14  * code that will combine entry/exit in a strace like way.
  15  */
  16 
  17 #include <unistd.h>
  18 #include <linux/limits.h>
  19 #include <linux/socket.h>
  20 #include <pid_filter.h>
  21 
  22 /* bpf-output associated map */
  23 bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
  24 
  25 /*
  26  * string_args_len: one per syscall arg, 0 means not a string or don't copy it,
  27  *                  PATH_MAX for copying everything, any other value to limit
  28  *                  it a la 'strace -s strsize'.
  29  */
  30 struct syscall {
  31         bool    enabled;
  32         u16     string_args_len[6];
  33 };
  34 
  35 bpf_map(syscalls, ARRAY, int, struct syscall, 512);
  36 
  37 /*
  38  * What to augment at entry?
  39  *
  40  * Pointer arg payloads (filenames, etc) passed from userspace to the kernel
  41  */
  42 bpf_map(syscalls_sys_enter, PROG_ARRAY, u32, u32, 512);
  43 
  44 /*
  45  * What to augment at exit?
  46  *
  47  * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace.
  48  */
  49 bpf_map(syscalls_sys_exit, PROG_ARRAY, u32, u32, 512);
  50 
  51 struct syscall_enter_args {
  52         unsigned long long common_tp_fields;
  53         long               syscall_nr;
  54         unsigned long      args[6];
  55 };
  56 
  57 struct syscall_exit_args {
  58         unsigned long long common_tp_fields;
  59         long               syscall_nr;
  60         long               ret;
  61 };
  62 
  63 struct augmented_arg {
  64         unsigned int    size;
  65         int             err;
  66         char            value[PATH_MAX];
  67 };
  68 
  69 pid_filter(pids_filtered);
  70 
  71 struct augmented_args_payload {
  72        struct syscall_enter_args args;
  73        union {
  74                 struct {
  75                         struct augmented_arg arg, arg2;
  76                 };
  77                 struct sockaddr_storage saddr;
  78         };
  79 };
  80 
  81 // We need more tmp space than the BPF stack can give us
  82 bpf_map(augmented_args_tmp, PERCPU_ARRAY, int, struct augmented_args_payload, 1);
  83 
  84 static inline struct augmented_args_payload *augmented_args_payload(void)
  85 {
  86         int key = 0;
  87         return bpf_map_lookup_elem(&augmented_args_tmp, &key);
  88 }
  89 
  90 static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len)
  91 {
  92         /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
  93         return perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
  94 }
  95 
  96 static inline
  97 unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
  98 {
  99         unsigned int augmented_len = sizeof(*augmented_arg);
 100         int string_len = probe_read_str(&augmented_arg->value, arg_len, arg);
 101 
 102         augmented_arg->size = augmented_arg->err = 0;
 103         /*
 104          * probe_read_str may return < 0, e.g. -EFAULT
 105          * So we leave that in the augmented_arg->size that userspace will
 106          */
 107         if (string_len > 0) {
 108                 augmented_len -= sizeof(augmented_arg->value) - string_len;
 109                 augmented_len &= sizeof(augmented_arg->value) - 1;
 110                 augmented_arg->size = string_len;
 111         } else {
 112                 /*
 113                  * So that username notice the error while still being able
 114                  * to skip this augmented arg record
 115                  */
 116                 augmented_arg->err = string_len;
 117                 augmented_len = offsetof(struct augmented_arg, value);
 118         }
 119 
 120         return augmented_len;
 121 }
 122 
 123 SEC("!raw_syscalls:unaugmented")
 124 int syscall_unaugmented(struct syscall_enter_args *args)
 125 {
 126         return 1;
 127 }
 128 
 129 /*
 130  * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in
 131  * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go
 132  * on from there, reading the first syscall arg as a string, i.e. open's
 133  * filename.
 134  */
 135 SEC("!syscalls:sys_enter_connect")
 136 int sys_enter_connect(struct syscall_enter_args *args)
 137 {
 138         struct augmented_args_payload *augmented_args = augmented_args_payload();
 139         const void *sockaddr_arg = (const void *)args->args[1];
 140         unsigned int socklen = args->args[2];
 141         unsigned int len = sizeof(augmented_args->args);
 142 
 143         if (augmented_args == NULL)
 144                 return 1; /* Failure: don't filter */
 145 
 146         if (socklen > sizeof(augmented_args->saddr))
 147                 socklen = sizeof(augmented_args->saddr);
 148 
 149         probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
 150 
 151         return augmented__output(args, augmented_args, len + socklen);
 152 }
 153 
 154 SEC("!syscalls:sys_enter_sendto")
 155 int sys_enter_sendto(struct syscall_enter_args *args)
 156 {
 157         struct augmented_args_payload *augmented_args = augmented_args_payload();
 158         const void *sockaddr_arg = (const void *)args->args[4];
 159         unsigned int socklen = args->args[5];
 160         unsigned int len = sizeof(augmented_args->args);
 161 
 162         if (augmented_args == NULL)
 163                 return 1; /* Failure: don't filter */
 164 
 165         if (socklen > sizeof(augmented_args->saddr))
 166                 socklen = sizeof(augmented_args->saddr);
 167 
 168         probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
 169 
 170         return augmented__output(args, augmented_args, len + socklen);
 171 }
 172 
 173 SEC("!syscalls:sys_enter_open")
 174 int sys_enter_open(struct syscall_enter_args *args)
 175 {
 176         struct augmented_args_payload *augmented_args = augmented_args_payload();
 177         const void *filename_arg = (const void *)args->args[0];
 178         unsigned int len = sizeof(augmented_args->args);
 179 
 180         if (augmented_args == NULL)
 181                 return 1; /* Failure: don't filter */
 182 
 183         len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
 184 
 185         return augmented__output(args, augmented_args, len);
 186 }
 187 
 188 SEC("!syscalls:sys_enter_openat")
 189 int sys_enter_openat(struct syscall_enter_args *args)
 190 {
 191         struct augmented_args_payload *augmented_args = augmented_args_payload();
 192         const void *filename_arg = (const void *)args->args[1];
 193         unsigned int len = sizeof(augmented_args->args);
 194 
 195         if (augmented_args == NULL)
 196                 return 1; /* Failure: don't filter */
 197 
 198         len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value));
 199 
 200         return augmented__output(args, augmented_args, len);
 201 }
 202 
 203 SEC("!syscalls:sys_enter_rename")
 204 int sys_enter_rename(struct syscall_enter_args *args)
 205 {
 206         struct augmented_args_payload *augmented_args = augmented_args_payload();
 207         const void *oldpath_arg = (const void *)args->args[0],
 208                    *newpath_arg = (const void *)args->args[1];
 209         unsigned int len = sizeof(augmented_args->args), oldpath_len;
 210 
 211         if (augmented_args == NULL)
 212                 return 1; /* Failure: don't filter */
 213 
 214         oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
 215         len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
 216 
 217         return augmented__output(args, augmented_args, len);
 218 }
 219 
 220 SEC("!syscalls:sys_enter_renameat")
 221 int sys_enter_renameat(struct syscall_enter_args *args)
 222 {
 223         struct augmented_args_payload *augmented_args = augmented_args_payload();
 224         const void *oldpath_arg = (const void *)args->args[1],
 225                    *newpath_arg = (const void *)args->args[3];
 226         unsigned int len = sizeof(augmented_args->args), oldpath_len;
 227 
 228         if (augmented_args == NULL)
 229                 return 1; /* Failure: don't filter */
 230 
 231         oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
 232         len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value));
 233 
 234         return augmented__output(args, augmented_args, len);
 235 }
 236 
 237 SEC("raw_syscalls:sys_enter")
 238 int sys_enter(struct syscall_enter_args *args)
 239 {
 240         struct augmented_args_payload *augmented_args;
 241         /*
 242          * We start len, the amount of data that will be in the perf ring
 243          * buffer, if this is not filtered out by one of pid_filter__has(),
 244          * syscall->enabled, etc, with the non-augmented raw syscall payload,
 245          * i.e. sizeof(augmented_args->args).
 246          *
 247          * We'll add to this as we add augmented syscalls right after that
 248          * initial, non-augmented raw_syscalls:sys_enter payload.
 249          */
 250         unsigned int len = sizeof(augmented_args->args);
 251         struct syscall *syscall;
 252 
 253         if (pid_filter__has(&pids_filtered, getpid()))
 254                 return 0;
 255 
 256         augmented_args = augmented_args_payload();
 257         if (augmented_args == NULL)
 258                 return 1;
 259 
 260         probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
 261 
 262         /*
 263          * Jump to syscall specific augmenter, even if the default one,
 264          * "!raw_syscalls:unaugmented" that will just return 1 to return the
 265          * unagmented tracepoint payload.
 266          */
 267         bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr);
 268 
 269         // If not found on the PROG_ARRAY syscalls map, then we're filtering it:
 270         return 0;
 271 }
 272 
 273 SEC("raw_syscalls:sys_exit")
 274 int sys_exit(struct syscall_exit_args *args)
 275 {
 276         struct syscall_exit_args exit_args;
 277 
 278         if (pid_filter__has(&pids_filtered, getpid()))
 279                 return 0;
 280 
 281         probe_read(&exit_args, sizeof(exit_args), args);
 282         /*
 283          * Jump to syscall specific return augmenter, even if the default one,
 284          * "!raw_syscalls:unaugmented" that will just return 1 to return the
 285          * unagmented tracepoint payload.
 286          */
 287         bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr);
 288         /*
 289          * If not found on the PROG_ARRAY syscalls map, then we're filtering it:
 290          */
 291         return 0;
 292 }
 293 
 294 license(GPL);

/* [<][>][^][v][top][bottom][index][help] */