root/samples/bpf/cpustat_kern.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. find_cpu_pstate_idx
  2. SEC
  3. SEC

   1 // SPDX-License-Identifier: GPL-2.0
   2 
   3 #include <linux/version.h>
   4 #include <linux/ptrace.h>
   5 #include <uapi/linux/bpf.h>
   6 #include "bpf_helpers.h"
   7 
   8 /*
   9  * The CPU number, cstate number and pstate number are based
  10  * on 96boards Hikey with octa CA53 CPUs.
  11  *
  12  * Every CPU have three idle states for cstate:
  13  *   WFI, CPU_OFF, CLUSTER_OFF
  14  *
  15  * Every CPU have 5 operating points:
  16  *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
  17  *
  18  * This code is based on these assumption and other platforms
  19  * need to adjust these definitions.
  20  */
  21 #define MAX_CPU                 8
  22 #define MAX_PSTATE_ENTRIES      5
  23 #define MAX_CSTATE_ENTRIES      3
  24 
  25 static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
  26 
  27 /*
  28  * my_map structure is used to record cstate and pstate index and
  29  * timestamp (Idx, Ts), when new event incoming we need to update
  30  * combination for new state index and timestamp (Idx`, Ts`).
  31  *
  32  * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
  33  * interval for the previous state: Duration(Idx) = Ts` - Ts.
  34  *
  35  * Every CPU has one below array for recording state index and
  36  * timestamp, and record for cstate and pstate saperately:
  37  *
  38  * +--------------------------+
  39  * | cstate timestamp         |
  40  * +--------------------------+
  41  * | cstate index             |
  42  * +--------------------------+
  43  * | pstate timestamp         |
  44  * +--------------------------+
  45  * | pstate index             |
  46  * +--------------------------+
  47  */
  48 #define MAP_OFF_CSTATE_TIME     0
  49 #define MAP_OFF_CSTATE_IDX      1
  50 #define MAP_OFF_PSTATE_TIME     2
  51 #define MAP_OFF_PSTATE_IDX      3
  52 #define MAP_OFF_NUM             4
  53 
  54 struct bpf_map_def SEC("maps") my_map = {
  55         .type = BPF_MAP_TYPE_ARRAY,
  56         .key_size = sizeof(u32),
  57         .value_size = sizeof(u64),
  58         .max_entries = MAX_CPU * MAP_OFF_NUM,
  59 };
  60 
  61 /* cstate_duration records duration time for every idle state per CPU */
  62 struct bpf_map_def SEC("maps") cstate_duration = {
  63         .type = BPF_MAP_TYPE_ARRAY,
  64         .key_size = sizeof(u32),
  65         .value_size = sizeof(u64),
  66         .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
  67 };
  68 
  69 /* pstate_duration records duration time for every operating point per CPU */
  70 struct bpf_map_def SEC("maps") pstate_duration = {
  71         .type = BPF_MAP_TYPE_ARRAY,
  72         .key_size = sizeof(u32),
  73         .value_size = sizeof(u64),
  74         .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
  75 };
  76 
  77 /*
  78  * The trace events for cpu_idle and cpu_frequency are taken from:
  79  * /sys/kernel/debug/tracing/events/power/cpu_idle/format
  80  * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
  81  *
  82  * These two events have same format, so define one common structure.
  83  */
  84 struct cpu_args {
  85         u64 pad;
  86         u32 state;
  87         u32 cpu_id;
  88 };
  89 
  90 /* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
  91 static u32 find_cpu_pstate_idx(u32 frequency)
  92 {
  93         u32 i;
  94 
  95         for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
  96                 if (frequency == cpu_opps[i])
  97                         return i;
  98         }
  99 
 100         return i;
 101 }
 102 
 103 SEC("tracepoint/power/cpu_idle")
 104 int bpf_prog1(struct cpu_args *ctx)
 105 {
 106         u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
 107         u32 key, cpu, pstate_idx;
 108         u64 *val;
 109 
 110         if (ctx->cpu_id > MAX_CPU)
 111                 return 0;
 112 
 113         cpu = ctx->cpu_id;
 114 
 115         key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
 116         cts = bpf_map_lookup_elem(&my_map, &key);
 117         if (!cts)
 118                 return 0;
 119 
 120         key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
 121         cstate = bpf_map_lookup_elem(&my_map, &key);
 122         if (!cstate)
 123                 return 0;
 124 
 125         key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
 126         pts = bpf_map_lookup_elem(&my_map, &key);
 127         if (!pts)
 128                 return 0;
 129 
 130         key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
 131         pstate = bpf_map_lookup_elem(&my_map, &key);
 132         if (!pstate)
 133                 return 0;
 134 
 135         prev_state = *cstate;
 136         *cstate = ctx->state;
 137 
 138         if (!*cts) {
 139                 *cts = bpf_ktime_get_ns();
 140                 return 0;
 141         }
 142 
 143         cur_ts = bpf_ktime_get_ns();
 144         delta = cur_ts - *cts;
 145         *cts = cur_ts;
 146 
 147         /*
 148          * When state doesn't equal to (u32)-1, the cpu will enter
 149          * one idle state; for this case we need to record interval
 150          * for the pstate.
 151          *
 152          *                 OPP2
 153          *            +---------------------+
 154          *     OPP1   |                     |
 155          *   ---------+                     |
 156          *                                  |  Idle state
 157          *                                  +---------------
 158          *
 159          *            |<- pstate duration ->|
 160          *            ^                     ^
 161          *           pts                  cur_ts
 162          */
 163         if (ctx->state != (u32)-1) {
 164 
 165                 /* record pstate after have first cpu_frequency event */
 166                 if (!*pts)
 167                         return 0;
 168 
 169                 delta = cur_ts - *pts;
 170 
 171                 pstate_idx = find_cpu_pstate_idx(*pstate);
 172                 if (pstate_idx >= MAX_PSTATE_ENTRIES)
 173                         return 0;
 174 
 175                 key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
 176                 val = bpf_map_lookup_elem(&pstate_duration, &key);
 177                 if (val)
 178                         __sync_fetch_and_add((long *)val, delta);
 179 
 180         /*
 181          * When state equal to (u32)-1, the cpu just exits from one
 182          * specific idle state; for this case we need to record
 183          * interval for the pstate.
 184          *
 185          *       OPP2
 186          *   -----------+
 187          *              |                          OPP1
 188          *              |                     +-----------
 189          *              |     Idle state      |
 190          *              +---------------------+
 191          *
 192          *              |<- cstate duration ->|
 193          *              ^                     ^
 194          *             cts                  cur_ts
 195          */
 196         } else {
 197 
 198                 key = cpu * MAX_CSTATE_ENTRIES + prev_state;
 199                 val = bpf_map_lookup_elem(&cstate_duration, &key);
 200                 if (val)
 201                         __sync_fetch_and_add((long *)val, delta);
 202         }
 203 
 204         /* Update timestamp for pstate as new start time */
 205         if (*pts)
 206                 *pts = cur_ts;
 207 
 208         return 0;
 209 }
 210 
 211 SEC("tracepoint/power/cpu_frequency")
 212 int bpf_prog2(struct cpu_args *ctx)
 213 {
 214         u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
 215         u32 key, cpu, pstate_idx;
 216         u64 *val;
 217 
 218         cpu = ctx->cpu_id;
 219 
 220         key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
 221         pts = bpf_map_lookup_elem(&my_map, &key);
 222         if (!pts)
 223                 return 0;
 224 
 225         key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
 226         pstate = bpf_map_lookup_elem(&my_map, &key);
 227         if (!pstate)
 228                 return 0;
 229 
 230         key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
 231         cstate = bpf_map_lookup_elem(&my_map, &key);
 232         if (!cstate)
 233                 return 0;
 234 
 235         prev_state = *pstate;
 236         *pstate = ctx->state;
 237 
 238         if (!*pts) {
 239                 *pts = bpf_ktime_get_ns();
 240                 return 0;
 241         }
 242 
 243         cur_ts = bpf_ktime_get_ns();
 244         delta = cur_ts - *pts;
 245         *pts = cur_ts;
 246 
 247         /* When CPU is in idle, bail out to skip pstate statistics */
 248         if (*cstate != (u32)(-1))
 249                 return 0;
 250 
 251         /*
 252          * The cpu changes to another different OPP (in below diagram
 253          * change frequency from OPP3 to OPP1), need recording interval
 254          * for previous frequency OPP3 and update timestamp as start
 255          * time for new frequency OPP1.
 256          *
 257          *                 OPP3
 258          *            +---------------------+
 259          *     OPP2   |                     |
 260          *   ---------+                     |
 261          *                                  |    OPP1
 262          *                                  +---------------
 263          *
 264          *            |<- pstate duration ->|
 265          *            ^                     ^
 266          *           pts                  cur_ts
 267          */
 268         pstate_idx = find_cpu_pstate_idx(*pstate);
 269         if (pstate_idx >= MAX_PSTATE_ENTRIES)
 270                 return 0;
 271 
 272         key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
 273         val = bpf_map_lookup_elem(&pstate_duration, &key);
 274         if (val)
 275                 __sync_fetch_and_add((long *)val, delta);
 276 
 277         return 0;
 278 }
 279 
 280 char _license[] SEC("license") = "GPL";
 281 u32 _version SEC("version") = LINUX_VERSION_CODE;

/* [<][>][^][v][top][bottom][index][help] */