root/tools/testing/selftests/bpf/progs/strobemeta.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. calc_location
  2. read_int_var
  3. read_str_var
  4. read_map_var
  5. read_strobe_meta
  6. SEC

   1 // SPDX-License-Identifier: GPL-2.0
   2 // Copyright (c) 2019 Facebook
   3 
   4 #include <stdint.h>
   5 #include <stddef.h>
   6 #include <stdbool.h>
   7 #include <linux/bpf.h>
   8 #include <linux/ptrace.h>
   9 #include <linux/sched.h>
  10 #include <linux/types.h>
  11 #include "bpf_helpers.h"
  12 
  13 typedef uint32_t pid_t;
  14 struct task_struct {};
  15 
  16 #define TASK_COMM_LEN 16
  17 #define PERF_MAX_STACK_DEPTH 127
  18 
  19 #define STROBE_TYPE_INVALID 0
  20 #define STROBE_TYPE_INT 1
  21 #define STROBE_TYPE_STR 2
  22 #define STROBE_TYPE_MAP 3
  23 
  24 #define STACK_TABLE_EPOCH_SHIFT 20
  25 #define STROBE_MAX_STR_LEN 1
  26 #define STROBE_MAX_CFGS 32
  27 #define STROBE_MAX_PAYLOAD                                              \
  28         (STROBE_MAX_STRS * STROBE_MAX_STR_LEN +                         \
  29         STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
  30 
  31 struct strobe_value_header {
  32         /*
  33          * meaning depends on type:
  34          * 1. int: 0, if value not set, 1 otherwise
  35          * 2. str: 1 always, whether value is set or not is determined by ptr
  36          * 3. map: 1 always, pointer points to additional struct with number
  37          *    of entries (up to STROBE_MAX_MAP_ENTRIES)
  38          */
  39         uint16_t len;
  40         /*
  41          * _reserved might be used for some future fields/flags, but we always
  42          * want to keep strobe_value_header to be 8 bytes, so BPF can read 16
  43          * bytes in one go and get both header and value
  44          */
  45         uint8_t _reserved[6];
  46 };
  47 
  48 /*
  49  * strobe_value_generic is used from BPF probe only, but needs to be a union
  50  * of strobe_value_int/strobe_value_str/strobe_value_map
  51  */
  52 struct strobe_value_generic {
  53         struct strobe_value_header header;
  54         union {
  55                 int64_t val;
  56                 void *ptr;
  57         };
  58 };
  59 
  60 struct strobe_value_int {
  61         struct strobe_value_header header;
  62         int64_t value;
  63 };
  64 
  65 struct strobe_value_str {
  66         struct strobe_value_header header;
  67         const char* value;
  68 };
  69 
  70 struct strobe_value_map {
  71         struct strobe_value_header header;
  72         const struct strobe_map_raw* value;
  73 };
  74 
  75 struct strobe_map_entry {
  76         const char* key;
  77         const char* val;
  78 };
  79 
  80 /*
  81  * Map of C-string key/value pairs with fixed maximum capacity. Each map has
  82  * corresponding int64 ID, which application can use (or ignore) in whatever
  83  * way appropriate. Map is "write-only", there is no way to get data out of
  84  * map. Map is intended to be used to provide metadata for profilers and is
  85  * not to be used for internal in-app communication. All methods are
  86  * thread-safe.
  87  */
  88 struct strobe_map_raw {
  89         /*
  90          * general purpose unique ID that's up to application to decide
  91          * whether and how to use; for request metadata use case id is unique
  92          * request ID that's used to match metadata with stack traces on
  93          * Strobelight backend side
  94          */
  95         int64_t id;
  96         /* number of used entries in map */
  97         int64_t cnt;
  98         /*
  99          * having volatile doesn't change anything on BPF side, but clang
 100          * emits warnings for passing `volatile const char *` into
 101          * bpf_probe_read_str that expects just `const char *`
 102          */
 103         const char* tag;
 104         /*
 105          * key/value entries, each consisting of 2 pointers to key and value
 106          * C strings
 107          */
 108         struct strobe_map_entry entries[STROBE_MAX_MAP_ENTRIES];
 109 };
 110 
 111 /* Following values define supported values of TLS mode */
 112 #define TLS_NOT_SET -1
 113 #define TLS_LOCAL_EXEC 0
 114 #define TLS_IMM_EXEC 1
 115 #define TLS_GENERAL_DYN 2
 116 
 117 /*
 118  * structure that universally represents TLS location (both for static
 119  * executables and shared libraries)
 120  */
 121 struct strobe_value_loc {
 122         /*
 123          * tls_mode defines what TLS mode was used for particular metavariable:
 124          * - -1 (TLS_NOT_SET) - no metavariable;
 125          * - 0 (TLS_LOCAL_EXEC) - Local Executable mode;
 126          * - 1 (TLS_IMM_EXEC) - Immediate Executable mode;
 127          * - 2 (TLS_GENERAL_DYN) - General Dynamic mode;
 128          * Local Dynamic mode is not yet supported, because never seen in
 129          * practice.  Mode defines how offset field is interpreted. See
 130          * calc_location() in below for details.
 131          */
 132         int64_t tls_mode;
 133         /*
 134          * TLS_LOCAL_EXEC: offset from thread pointer (fs:0 for x86-64,
 135          * tpidr_el0 for aarch64).
 136          * TLS_IMM_EXEC: absolute address of GOT entry containing offset
 137          * from thread pointer;
 138          * TLS_GENERAL_DYN: absolute addres of double GOT entry
 139          * containing tls_index_t struct;
 140          */
 141         int64_t offset;
 142 };
 143 
 144 struct strobemeta_cfg {
 145         int64_t req_meta_idx;
 146         struct strobe_value_loc int_locs[STROBE_MAX_INTS];
 147         struct strobe_value_loc str_locs[STROBE_MAX_STRS];
 148         struct strobe_value_loc map_locs[STROBE_MAX_MAPS];
 149 };
 150 
 151 struct strobe_map_descr {
 152         uint64_t id;
 153         int16_t tag_len;
 154         /*
 155          * cnt <0 - map value isn't set;
 156          * 0 - map has id set, but no key/value entries
 157          */
 158         int16_t cnt;
 159         /*
 160          * both key_lens[i] and val_lens[i] should be >0 for present key/value
 161          * entry
 162          */
 163         uint16_t key_lens[STROBE_MAX_MAP_ENTRIES];
 164         uint16_t val_lens[STROBE_MAX_MAP_ENTRIES];
 165 };
 166 
 167 struct strobemeta_payload {
 168         /* req_id has valid request ID, if req_meta_valid == 1 */
 169         int64_t req_id;
 170         uint8_t req_meta_valid;
 171         /*
 172          * mask has Nth bit set to 1, if Nth metavar was present and
 173          * successfully read
 174          */
 175         uint64_t int_vals_set_mask;
 176         int64_t int_vals[STROBE_MAX_INTS];
 177         /* len is >0 for present values */
 178         uint16_t str_lens[STROBE_MAX_STRS];
 179         /* if map_descrs[i].cnt == -1, metavar is not present/set */
 180         struct strobe_map_descr map_descrs[STROBE_MAX_MAPS];
 181         /*
 182          * payload has compactly packed values of str and map variables in the
 183          * form: strval1\0strval2\0map1key1\0map1val1\0map2key1\0map2val1\0
 184          * (and so on); str_lens[i], key_lens[i] and val_lens[i] determines
 185          * value length
 186          */
 187         char payload[STROBE_MAX_PAYLOAD];
 188 };
 189 
 190 struct strobelight_bpf_sample {
 191         uint64_t ktime;
 192         char comm[TASK_COMM_LEN];
 193         pid_t pid;
 194         int user_stack_id;
 195         int kernel_stack_id;
 196         int has_meta;
 197         struct strobemeta_payload metadata;
 198         /*
 199          * makes it possible to pass (<real payload size> + 1) as data size to
 200          * perf_submit() to avoid perf_submit's paranoia about passing zero as
 201          * size, as it deduces that <real payload size> might be
 202          * **theoretically** zero
 203          */
 204         char dummy_safeguard;
 205 };
 206 
 207 struct {
 208         __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
 209         __uint(max_entries, 32);
 210         __uint(key_size, sizeof(int));
 211         __uint(value_size, sizeof(int));
 212 } samples SEC(".maps");
 213 
 214 struct {
 215         __uint(type, BPF_MAP_TYPE_STACK_TRACE);
 216         __uint(max_entries, 16);
 217         __uint(key_size, sizeof(uint32_t));
 218         __uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
 219 } stacks_0 SEC(".maps");
 220 
 221 struct {
 222         __uint(type, BPF_MAP_TYPE_STACK_TRACE);
 223         __uint(max_entries, 16);
 224         __uint(key_size, sizeof(uint32_t));
 225         __uint(value_size, sizeof(uint64_t) * PERF_MAX_STACK_DEPTH);
 226 } stacks_1 SEC(".maps");
 227 
 228 struct {
 229         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 230         __uint(max_entries, 1);
 231         __type(key, uint32_t);
 232         __type(value, struct strobelight_bpf_sample);
 233 } sample_heap SEC(".maps");
 234 
 235 struct {
 236         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 237         __uint(max_entries, STROBE_MAX_CFGS);
 238         __type(key, pid_t);
 239         __type(value, struct strobemeta_cfg);
 240 } strobemeta_cfgs SEC(".maps");
 241 
 242 /* Type for the dtv.  */
 243 /* https://github.com/lattera/glibc/blob/master/nptl/sysdeps/x86_64/tls.h#L34 */
 244 typedef union dtv {
 245         size_t counter;
 246         struct {
 247                 void* val;
 248                 bool is_static;
 249         } pointer;
 250 } dtv_t;
 251 
 252 /* Partial definition for tcbhead_t */
 253 /* https://github.com/bminor/glibc/blob/master/sysdeps/x86_64/nptl/tls.h#L42 */
 254 struct tcbhead {
 255         void* tcb;
 256         dtv_t* dtv;
 257 };
 258 
 259 /*
 260  * TLS module/offset information for shared library case.
 261  * For x86-64, this is mapped onto two entries in GOT.
 262  * For aarch64, this is pointed to by second GOT entry.
 263  */
 264 struct tls_index {
 265         uint64_t module;
 266         uint64_t offset;
 267 };
 268 
 269 static __always_inline void *calc_location(struct strobe_value_loc *loc,
 270                                            void *tls_base)
 271 {
 272         /*
 273          * tls_mode value is:
 274          * - -1 (TLS_NOT_SET), if no metavar is present;
 275          * - 0 (TLS_LOCAL_EXEC), if metavar uses Local Executable mode of TLS
 276          * (offset from fs:0 for x86-64 or tpidr_el0 for aarch64);
 277          * - 1 (TLS_IMM_EXEC), if metavar uses Immediate Executable mode of TLS;
 278          * - 2 (TLS_GENERAL_DYN), if metavar uses General Dynamic mode of TLS;
 279          * This schema allows to use something like:
 280          * (tls_mode + 1) * (tls_base + offset)
 281          * to get NULL for "no metavar" location, or correct pointer for local
 282          * executable mode without doing extra ifs.
 283          */
 284         if (loc->tls_mode <= TLS_LOCAL_EXEC) {
 285                 /* static executable is simple, we just have offset from
 286                  * tls_base */
 287                 void *addr = tls_base + loc->offset;
 288                 /* multiply by (tls_mode + 1) to get NULL, if we have no
 289                  * metavar in this slot */
 290                 return (void *)((loc->tls_mode + 1) * (int64_t)addr);
 291         }
 292         /*
 293          * Other modes are more complicated, we need to jump through few hoops.
 294          *
 295          * For immediate executable mode (currently supported only for aarch64):
 296          *  - loc->offset is pointing to a GOT entry containing fixed offset
 297          *  relative to tls_base;
 298          *
 299          * For general dynamic mode:
 300          *  - loc->offset is pointing to a beginning of double GOT entries;
 301          *  - (for aarch64 only) second entry points to tls_index_t struct;
 302          *  - (for x86-64 only) two GOT entries are already tls_index_t;
 303          *  - tls_index_t->module is used to find start of TLS section in
 304          *  which variable resides;
 305          *  - tls_index_t->offset provides offset within that TLS section,
 306          *  pointing to value of variable.
 307          */
 308         struct tls_index tls_index;
 309         dtv_t *dtv;
 310         void *tls_ptr;
 311 
 312         bpf_probe_read(&tls_index, sizeof(struct tls_index),
 313                        (void *)loc->offset);
 314         /* valid module index is always positive */
 315         if (tls_index.module > 0) {
 316                 /* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */
 317                 bpf_probe_read(&dtv, sizeof(dtv),
 318                                &((struct tcbhead *)tls_base)->dtv);
 319                 dtv += tls_index.module;
 320         } else {
 321                 dtv = NULL;
 322         }
 323         bpf_probe_read(&tls_ptr, sizeof(void *), dtv);
 324         /* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
 325         return tls_ptr && tls_ptr != (void *)-1
 326                 ? tls_ptr + tls_index.offset
 327                 : NULL;
 328 }
 329 
 330 static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
 331                                          size_t idx, void *tls_base,
 332                                          struct strobe_value_generic *value,
 333                                          struct strobemeta_payload *data)
 334 {
 335         void *location = calc_location(&cfg->int_locs[idx], tls_base);
 336         if (!location)
 337                 return;
 338 
 339         bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
 340         data->int_vals[idx] = value->val;
 341         if (value->header.len)
 342                 data->int_vals_set_mask |= (1 << idx);
 343 }
 344 
 345 static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
 346                                              size_t idx, void *tls_base,
 347                                              struct strobe_value_generic *value,
 348                                              struct strobemeta_payload *data,
 349                                              void *payload)
 350 {
 351         void *location;
 352         uint32_t len;
 353 
 354         data->str_lens[idx] = 0;
 355         location = calc_location(&cfg->str_locs[idx], tls_base);
 356         if (!location)
 357                 return 0;
 358 
 359         bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
 360         len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr);
 361         /*
 362          * if bpf_probe_read_str returns error (<0), due to casting to
 363          * unsinged int, it will become big number, so next check is
 364          * sufficient to check for errors AND prove to BPF verifier, that
 365          * bpf_probe_read_str won't return anything bigger than
 366          * STROBE_MAX_STR_LEN
 367          */
 368         if (len > STROBE_MAX_STR_LEN)
 369                 return 0;
 370 
 371         data->str_lens[idx] = len;
 372         return len;
 373 }
 374 
 375 static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
 376                                           size_t idx, void *tls_base,
 377                                           struct strobe_value_generic *value,
 378                                           struct strobemeta_payload *data,
 379                                           void *payload)
 380 {
 381         struct strobe_map_descr* descr = &data->map_descrs[idx];
 382         struct strobe_map_raw map;
 383         void *location;
 384         uint32_t len;
 385         int i;
 386 
 387         descr->tag_len = 0; /* presume no tag is set */
 388         descr->cnt = -1; /* presume no value is set */
 389 
 390         location = calc_location(&cfg->map_locs[idx], tls_base);
 391         if (!location)
 392                 return payload;
 393 
 394         bpf_probe_read(value, sizeof(struct strobe_value_generic), location);
 395         if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr))
 396                 return payload;
 397 
 398         descr->id = map.id;
 399         descr->cnt = map.cnt;
 400         if (cfg->req_meta_idx == idx) {
 401                 data->req_id = map.id;
 402                 data->req_meta_valid = 1;
 403         }
 404 
 405         len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag);
 406         if (len <= STROBE_MAX_STR_LEN) {
 407                 descr->tag_len = len;
 408                 payload += len;
 409         }
 410 
 411 #ifdef NO_UNROLL
 412 #pragma clang loop unroll(disable)
 413 #else
 414 #pragma unroll
 415 #endif
 416         for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
 417                 if (i >= map.cnt)
 418                         break;
 419 
 420                 descr->key_lens[i] = 0;
 421                 len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
 422                                          map.entries[i].key);
 423                 if (len <= STROBE_MAX_STR_LEN) {
 424                         descr->key_lens[i] = len;
 425                         payload += len;
 426                 }
 427                 descr->val_lens[i] = 0;
 428                 len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN,
 429                                          map.entries[i].val);
 430                 if (len <= STROBE_MAX_STR_LEN) {
 431                         descr->val_lens[i] = len;
 432                         payload += len;
 433                 }
 434         }
 435 
 436         return payload;
 437 }
 438 
 439 /*
 440  * read_strobe_meta returns NULL, if no metadata was read; otherwise returns
 441  * pointer to *right after* payload ends
 442  */
 443 static __always_inline void *read_strobe_meta(struct task_struct *task,
 444                                               struct strobemeta_payload *data)
 445 {
 446         pid_t pid = bpf_get_current_pid_tgid() >> 32;
 447         struct strobe_value_generic value = {0};
 448         struct strobemeta_cfg *cfg;
 449         void *tls_base, *payload;
 450 
 451         cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
 452         if (!cfg)
 453                 return NULL;
 454 
 455         data->int_vals_set_mask = 0;
 456         data->req_meta_valid = 0;
 457         payload = data->payload;
 458         /*
 459          * we don't have struct task_struct definition, it should be:
 460          * tls_base = (void *)task->thread.fsbase;
 461          */
 462         tls_base = (void *)task;
 463 
 464 #ifdef NO_UNROLL
 465 #pragma clang loop unroll(disable)
 466 #else
 467 #pragma unroll
 468 #endif
 469         for (int i = 0; i < STROBE_MAX_INTS; ++i) {
 470                 read_int_var(cfg, i, tls_base, &value, data);
 471         }
 472 #ifdef NO_UNROLL
 473 #pragma clang loop unroll(disable)
 474 #else
 475 #pragma unroll
 476 #endif
 477         for (int i = 0; i < STROBE_MAX_STRS; ++i) {
 478                 payload += read_str_var(cfg, i, tls_base, &value, data, payload);
 479         }
 480 #ifdef NO_UNROLL
 481 #pragma clang loop unroll(disable)
 482 #else
 483 #pragma unroll
 484 #endif
 485         for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
 486                 payload = read_map_var(cfg, i, tls_base, &value, data, payload);
 487         }
 488         /*
 489          * return pointer right after end of payload, so it's possible to
 490          * calculate exact amount of useful data that needs to be sent
 491          */
 492         return payload;
 493 }
 494 
 495 SEC("raw_tracepoint/kfree_skb")
 496 int on_event(struct pt_regs *ctx) {
 497         pid_t pid =  bpf_get_current_pid_tgid() >> 32;
 498         struct strobelight_bpf_sample* sample;
 499         struct task_struct *task;
 500         uint32_t zero = 0;
 501         uint64_t ktime_ns;
 502         void *sample_end;
 503 
 504         sample = bpf_map_lookup_elem(&sample_heap, &zero);
 505         if (!sample)
 506                 return 0; /* this will never happen */
 507 
 508         sample->pid = pid;
 509         bpf_get_current_comm(&sample->comm, TASK_COMM_LEN);
 510         ktime_ns = bpf_ktime_get_ns();
 511         sample->ktime = ktime_ns;
 512 
 513         task = (struct task_struct *)bpf_get_current_task();
 514         sample_end = read_strobe_meta(task, &sample->metadata);
 515         sample->has_meta = sample_end != NULL;
 516         sample_end = sample_end ? : &sample->metadata;
 517 
 518         if ((ktime_ns >> STACK_TABLE_EPOCH_SHIFT) & 1) {
 519                 sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_1, 0);
 520                 sample->user_stack_id = bpf_get_stackid(ctx, &stacks_1, BPF_F_USER_STACK);
 521         } else {
 522                 sample->kernel_stack_id = bpf_get_stackid(ctx, &stacks_0, 0);
 523                 sample->user_stack_id = bpf_get_stackid(ctx, &stacks_0, BPF_F_USER_STACK);
 524         }
 525 
 526         uint64_t sample_size = sample_end - (void *)sample;
 527         /* should always be true */
 528         if (sample_size < sizeof(struct strobelight_bpf_sample))
 529                 bpf_perf_event_output(ctx, &samples, 0, sample, 1 + sample_size);
 530         return 0;
 531 }
 532 
 533 char _license[] SEC("license") = "GPL";

/* [<][>][^][v][top][bottom][index][help] */