This source file includes following definitions.
- saved_value_cmp
- saved_value_new
- saved_value_delete
- saved_value_lookup
- runtime_stat__init
- runtime_stat__exit
- perf_stat__init_shadow_stats
- evsel_context
- reset_stat
- perf_stat__reset_shadow_stats
- perf_stat__reset_shadow_per_stat
- update_runtime_stat
- perf_stat__update_shadow_stats
- get_ratio_color
- perf_stat__find_event
- perf_stat__collect_metric_expr
- runtime_stat_avg
- runtime_stat_n
- print_stalled_cycles_frontend
- print_stalled_cycles_backend
- print_branch_misses
- print_l1_dcache_misses
- print_l1_icache_misses
- print_dtlb_cache_misses
- print_itlb_cache_misses
- print_ll_cache_misses
- sanitize_val
- td_total_slots
- td_bad_spec
- td_retiring
- td_fe_bound
- td_be_bound
- print_smi_cost
- generic_metric
- perf_stat__print_shadow_stats
1
2 #include <stdio.h>
3 #include "evsel.h"
4 #include "stat.h"
5 #include "color.h"
6 #include "pmu.h"
7 #include "rblist.h"
8 #include "evlist.h"
9 #include "expr.h"
10 #include "metricgroup.h"
11 #include <linux/zalloc.h>
12
13
14
15
16
17
18
19
20
21
22 struct runtime_stat rt_stat;
23 struct stats walltime_nsecs_stats;
24
25 struct saved_value {
26 struct rb_node rb_node;
27 struct evsel *evsel;
28 enum stat_type type;
29 int ctx;
30 int cpu;
31 struct runtime_stat *stat;
32 struct stats stats;
33 u64 metric_total;
34 int metric_other;
35 };
36
37 static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
38 {
39 struct saved_value *a = container_of(rb_node,
40 struct saved_value,
41 rb_node);
42 const struct saved_value *b = entry;
43
44 if (a->cpu != b->cpu)
45 return a->cpu - b->cpu;
46
47
48
49
50
51
52
53
54 if (a->type != b->type)
55 return a->type - b->type;
56
57 if (a->ctx != b->ctx)
58 return a->ctx - b->ctx;
59
60 if (a->evsel == NULL && b->evsel == NULL) {
61 if (a->stat == b->stat)
62 return 0;
63
64 if ((char *)a->stat < (char *)b->stat)
65 return -1;
66
67 return 1;
68 }
69
70 if (a->evsel == b->evsel)
71 return 0;
72 if ((char *)a->evsel < (char *)b->evsel)
73 return -1;
74 return +1;
75 }
76
77 static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
78 const void *entry)
79 {
80 struct saved_value *nd = malloc(sizeof(struct saved_value));
81
82 if (!nd)
83 return NULL;
84 memcpy(nd, entry, sizeof(struct saved_value));
85 return &nd->rb_node;
86 }
87
88 static void saved_value_delete(struct rblist *rblist __maybe_unused,
89 struct rb_node *rb_node)
90 {
91 struct saved_value *v;
92
93 BUG_ON(!rb_node);
94 v = container_of(rb_node, struct saved_value, rb_node);
95 free(v);
96 }
97
98 static struct saved_value *saved_value_lookup(struct evsel *evsel,
99 int cpu,
100 bool create,
101 enum stat_type type,
102 int ctx,
103 struct runtime_stat *st)
104 {
105 struct rblist *rblist;
106 struct rb_node *nd;
107 struct saved_value dm = {
108 .cpu = cpu,
109 .evsel = evsel,
110 .type = type,
111 .ctx = ctx,
112 .stat = st,
113 };
114
115 rblist = &st->value_list;
116
117 nd = rblist__find(rblist, &dm);
118 if (nd)
119 return container_of(nd, struct saved_value, rb_node);
120 if (create) {
121 rblist__add_node(rblist, &dm);
122 nd = rblist__find(rblist, &dm);
123 if (nd)
124 return container_of(nd, struct saved_value, rb_node);
125 }
126 return NULL;
127 }
128
129 void runtime_stat__init(struct runtime_stat *st)
130 {
131 struct rblist *rblist = &st->value_list;
132
133 rblist__init(rblist);
134 rblist->node_cmp = saved_value_cmp;
135 rblist->node_new = saved_value_new;
136 rblist->node_delete = saved_value_delete;
137 }
138
139 void runtime_stat__exit(struct runtime_stat *st)
140 {
141 rblist__exit(&st->value_list);
142 }
143
144 void perf_stat__init_shadow_stats(void)
145 {
146 runtime_stat__init(&rt_stat);
147 }
148
149 static int evsel_context(struct evsel *evsel)
150 {
151 int ctx = 0;
152
153 if (evsel->core.attr.exclude_kernel)
154 ctx |= CTX_BIT_KERNEL;
155 if (evsel->core.attr.exclude_user)
156 ctx |= CTX_BIT_USER;
157 if (evsel->core.attr.exclude_hv)
158 ctx |= CTX_BIT_HV;
159 if (evsel->core.attr.exclude_host)
160 ctx |= CTX_BIT_HOST;
161 if (evsel->core.attr.exclude_idle)
162 ctx |= CTX_BIT_IDLE;
163
164 return ctx;
165 }
166
167 static void reset_stat(struct runtime_stat *st)
168 {
169 struct rblist *rblist;
170 struct rb_node *pos, *next;
171
172 rblist = &st->value_list;
173 next = rb_first_cached(&rblist->entries);
174 while (next) {
175 pos = next;
176 next = rb_next(pos);
177 memset(&container_of(pos, struct saved_value, rb_node)->stats,
178 0,
179 sizeof(struct stats));
180 }
181 }
182
183 void perf_stat__reset_shadow_stats(void)
184 {
185 reset_stat(&rt_stat);
186 memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
187 }
188
189 void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
190 {
191 reset_stat(st);
192 }
193
194 static void update_runtime_stat(struct runtime_stat *st,
195 enum stat_type type,
196 int ctx, int cpu, u64 count)
197 {
198 struct saved_value *v = saved_value_lookup(NULL, cpu, true,
199 type, ctx, st);
200
201 if (v)
202 update_stats(&v->stats, count);
203 }
204
205
206
207
208
209
210 void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
211 int cpu, struct runtime_stat *st)
212 {
213 int ctx = evsel_context(counter);
214 u64 count_ns = count;
215 struct saved_value *v;
216
217 count *= counter->scale;
218
219 if (perf_evsel__is_clock(counter))
220 update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
221 else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
222 update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
223 else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
224 update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
225 else if (perf_stat_evsel__is(counter, TRANSACTION_START))
226 update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
227 else if (perf_stat_evsel__is(counter, ELISION_START))
228 update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
229 else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
230 update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
231 ctx, cpu, count);
232 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
233 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
234 ctx, cpu, count);
235 else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
236 update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
237 ctx, cpu, count);
238 else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
239 update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
240 ctx, cpu, count);
241 else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
242 update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
243 ctx, cpu, count);
244 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
245 update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
246 ctx, cpu, count);
247 else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
248 update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
249 ctx, cpu, count);
250 else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
251 update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
252 else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
253 update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
254 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
255 update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
256 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
257 update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
258 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
259 update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
260 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
261 update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
262 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
263 update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
264 else if (perf_stat_evsel__is(counter, SMI_NUM))
265 update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
266 else if (perf_stat_evsel__is(counter, APERF))
267 update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
268
269 if (counter->collect_stat) {
270 v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st);
271 update_stats(&v->stats, count);
272 if (counter->metric_leader)
273 v->metric_total += count;
274 } else if (counter->metric_leader) {
275 v = saved_value_lookup(counter->metric_leader,
276 cpu, true, STAT_NONE, 0, st);
277 v->metric_total += count;
278 v->metric_other++;
279 }
280 }
281
282
283 enum grc_type {
284 GRC_STALLED_CYCLES_FE,
285 GRC_STALLED_CYCLES_BE,
286 GRC_CACHE_MISSES,
287 GRC_MAX_NR
288 };
289
290 static const char *get_ratio_color(enum grc_type type, double ratio)
291 {
292 static const double grc_table[GRC_MAX_NR][3] = {
293 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
294 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
295 [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
296 };
297 const char *color = PERF_COLOR_NORMAL;
298
299 if (ratio > grc_table[type][0])
300 color = PERF_COLOR_RED;
301 else if (ratio > grc_table[type][1])
302 color = PERF_COLOR_MAGENTA;
303 else if (ratio > grc_table[type][2])
304 color = PERF_COLOR_YELLOW;
305
306 return color;
307 }
308
309 static struct evsel *perf_stat__find_event(struct evlist *evsel_list,
310 const char *name)
311 {
312 struct evsel *c2;
313
314 evlist__for_each_entry (evsel_list, c2) {
315 if (!strcasecmp(c2->name, name) && !c2->collect_stat)
316 return c2;
317 }
318 return NULL;
319 }
320
321
322 void perf_stat__collect_metric_expr(struct evlist *evsel_list)
323 {
324 struct evsel *counter, *leader, **metric_events, *oc;
325 bool found;
326 const char **metric_names;
327 int i;
328 int num_metric_names;
329
330 evlist__for_each_entry(evsel_list, counter) {
331 bool invalid = false;
332
333 leader = counter->leader;
334 if (!counter->metric_expr)
335 continue;
336 metric_events = counter->metric_events;
337 if (!metric_events) {
338 if (expr__find_other(counter->metric_expr, counter->name,
339 &metric_names, &num_metric_names) < 0)
340 continue;
341
342 metric_events = calloc(sizeof(struct evsel *),
343 num_metric_names + 1);
344 if (!metric_events)
345 return;
346 counter->metric_events = metric_events;
347 }
348
349 for (i = 0; i < num_metric_names; i++) {
350 found = false;
351 if (leader) {
352
353 for_each_group_member (oc, leader) {
354 if (!strcasecmp(oc->name, metric_names[i]) &&
355 !oc->collect_stat) {
356 found = true;
357 break;
358 }
359 }
360 }
361 if (!found) {
362
363 oc = perf_stat__find_event(evsel_list, metric_names[i]);
364 }
365 if (!oc) {
366
367 static char *printed;
368
369
370
371
372
373
374
375
376 if (!printed || strcasecmp(printed, metric_names[i])) {
377 fprintf(stderr,
378 "Add %s event to groups to get metric expression for %s\n",
379 metric_names[i],
380 counter->name);
381 printed = strdup(metric_names[i]);
382 }
383 invalid = true;
384 continue;
385 }
386 metric_events[i] = oc;
387 oc->collect_stat = true;
388 }
389 metric_events[i] = NULL;
390 free(metric_names);
391 if (invalid) {
392 free(metric_events);
393 counter->metric_events = NULL;
394 counter->metric_expr = NULL;
395 }
396 }
397 }
398
399 static double runtime_stat_avg(struct runtime_stat *st,
400 enum stat_type type, int ctx, int cpu)
401 {
402 struct saved_value *v;
403
404 v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
405 if (!v)
406 return 0.0;
407
408 return avg_stats(&v->stats);
409 }
410
411 static double runtime_stat_n(struct runtime_stat *st,
412 enum stat_type type, int ctx, int cpu)
413 {
414 struct saved_value *v;
415
416 v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
417 if (!v)
418 return 0.0;
419
420 return v->stats.n;
421 }
422
423 static void print_stalled_cycles_frontend(struct perf_stat_config *config,
424 int cpu,
425 struct evsel *evsel, double avg,
426 struct perf_stat_output_ctx *out,
427 struct runtime_stat *st)
428 {
429 double total, ratio = 0.0;
430 const char *color;
431 int ctx = evsel_context(evsel);
432
433 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
434
435 if (total)
436 ratio = avg / total * 100.0;
437
438 color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
439
440 if (ratio)
441 out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
442 ratio);
443 else
444 out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
445 }
446
447 static void print_stalled_cycles_backend(struct perf_stat_config *config,
448 int cpu,
449 struct evsel *evsel, double avg,
450 struct perf_stat_output_ctx *out,
451 struct runtime_stat *st)
452 {
453 double total, ratio = 0.0;
454 const char *color;
455 int ctx = evsel_context(evsel);
456
457 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
458
459 if (total)
460 ratio = avg / total * 100.0;
461
462 color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
463
464 out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
465 }
466
467 static void print_branch_misses(struct perf_stat_config *config,
468 int cpu,
469 struct evsel *evsel,
470 double avg,
471 struct perf_stat_output_ctx *out,
472 struct runtime_stat *st)
473 {
474 double total, ratio = 0.0;
475 const char *color;
476 int ctx = evsel_context(evsel);
477
478 total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
479
480 if (total)
481 ratio = avg / total * 100.0;
482
483 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
484
485 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
486 }
487
488 static void print_l1_dcache_misses(struct perf_stat_config *config,
489 int cpu,
490 struct evsel *evsel,
491 double avg,
492 struct perf_stat_output_ctx *out,
493 struct runtime_stat *st)
494
495 {
496 double total, ratio = 0.0;
497 const char *color;
498 int ctx = evsel_context(evsel);
499
500 total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
501
502 if (total)
503 ratio = avg / total * 100.0;
504
505 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
506
507 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
508 }
509
510 static void print_l1_icache_misses(struct perf_stat_config *config,
511 int cpu,
512 struct evsel *evsel,
513 double avg,
514 struct perf_stat_output_ctx *out,
515 struct runtime_stat *st)
516
517 {
518 double total, ratio = 0.0;
519 const char *color;
520 int ctx = evsel_context(evsel);
521
522 total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
523
524 if (total)
525 ratio = avg / total * 100.0;
526
527 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
528 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
529 }
530
531 static void print_dtlb_cache_misses(struct perf_stat_config *config,
532 int cpu,
533 struct evsel *evsel,
534 double avg,
535 struct perf_stat_output_ctx *out,
536 struct runtime_stat *st)
537 {
538 double total, ratio = 0.0;
539 const char *color;
540 int ctx = evsel_context(evsel);
541
542 total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
543
544 if (total)
545 ratio = avg / total * 100.0;
546
547 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
548 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
549 }
550
551 static void print_itlb_cache_misses(struct perf_stat_config *config,
552 int cpu,
553 struct evsel *evsel,
554 double avg,
555 struct perf_stat_output_ctx *out,
556 struct runtime_stat *st)
557 {
558 double total, ratio = 0.0;
559 const char *color;
560 int ctx = evsel_context(evsel);
561
562 total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
563
564 if (total)
565 ratio = avg / total * 100.0;
566
567 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
568 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
569 }
570
571 static void print_ll_cache_misses(struct perf_stat_config *config,
572 int cpu,
573 struct evsel *evsel,
574 double avg,
575 struct perf_stat_output_ctx *out,
576 struct runtime_stat *st)
577 {
578 double total, ratio = 0.0;
579 const char *color;
580 int ctx = evsel_context(evsel);
581
582 total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
583
584 if (total)
585 ratio = avg / total * 100.0;
586
587 color = get_ratio_color(GRC_CACHE_MISSES, ratio);
588 out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
589 }
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633 static double sanitize_val(double x)
634 {
635 if (x < 0 && x >= -0.02)
636 return 0.0;
637 return x;
638 }
639
640 static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
641 {
642 return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
643 }
644
645 static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
646 {
647 double bad_spec = 0;
648 double total_slots;
649 double total;
650
651 total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
652 runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
653 runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
654
655 total_slots = td_total_slots(ctx, cpu, st);
656 if (total_slots)
657 bad_spec = total / total_slots;
658 return sanitize_val(bad_spec);
659 }
660
661 static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
662 {
663 double retiring = 0;
664 double total_slots = td_total_slots(ctx, cpu, st);
665 double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
666 ctx, cpu);
667
668 if (total_slots)
669 retiring = ret_slots / total_slots;
670 return retiring;
671 }
672
673 static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
674 {
675 double fe_bound = 0;
676 double total_slots = td_total_slots(ctx, cpu, st);
677 double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
678 ctx, cpu);
679
680 if (total_slots)
681 fe_bound = fetch_bub / total_slots;
682 return fe_bound;
683 }
684
685 static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
686 {
687 double sum = (td_fe_bound(ctx, cpu, st) +
688 td_bad_spec(ctx, cpu, st) +
689 td_retiring(ctx, cpu, st));
690 if (sum == 0)
691 return 0;
692 return sanitize_val(1.0 - sum);
693 }
694
695 static void print_smi_cost(struct perf_stat_config *config,
696 int cpu, struct evsel *evsel,
697 struct perf_stat_output_ctx *out,
698 struct runtime_stat *st)
699 {
700 double smi_num, aperf, cycles, cost = 0.0;
701 int ctx = evsel_context(evsel);
702 const char *color = NULL;
703
704 smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
705 aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
706 cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
707
708 if ((cycles == 0) || (aperf == 0))
709 return;
710
711 if (smi_num)
712 cost = (aperf - cycles) / aperf * 100.00;
713
714 if (cost > 10)
715 color = PERF_COLOR_RED;
716 out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
717 out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
718 }
719
720 static void generic_metric(struct perf_stat_config *config,
721 const char *metric_expr,
722 struct evsel **metric_events,
723 char *name,
724 const char *metric_name,
725 const char *metric_unit,
726 double avg,
727 int cpu,
728 struct perf_stat_output_ctx *out,
729 struct runtime_stat *st)
730 {
731 print_metric_t print_metric = out->print_metric;
732 struct parse_ctx pctx;
733 double ratio, scale;
734 int i;
735 void *ctxp = out->ctx;
736 char *n, *pn;
737
738 expr__ctx_init(&pctx);
739
740 expr__add_id(&pctx, name, avg);
741 for (i = 0; metric_events[i]; i++) {
742 struct saved_value *v;
743 struct stats *stats;
744 u64 metric_total = 0;
745
746 if (!strcmp(metric_events[i]->name, "duration_time")) {
747 stats = &walltime_nsecs_stats;
748 scale = 1e-9;
749 } else {
750 v = saved_value_lookup(metric_events[i], cpu, false,
751 STAT_NONE, 0, st);
752 if (!v)
753 break;
754 stats = &v->stats;
755 scale = 1.0;
756
757 if (v->metric_other)
758 metric_total = v->metric_total;
759 }
760
761 n = strdup(metric_events[i]->name);
762 if (!n)
763 return;
764
765
766
767
768
769 pn = strchr(n, ' ');
770 if (pn)
771 *pn = 0;
772
773 if (metric_total)
774 expr__add_id(&pctx, n, metric_total);
775 else
776 expr__add_id(&pctx, n, avg_stats(stats)*scale);
777 }
778
779 if (!metric_events[i]) {
780 const char *p = metric_expr;
781
782 if (expr__parse(&ratio, &pctx, &p) == 0) {
783 char *unit;
784 char metric_bf[64];
785
786 if (metric_unit && metric_name) {
787 if (perf_pmu__convert_scale(metric_unit,
788 &unit, &scale) >= 0) {
789 ratio *= scale;
790 }
791
792 scnprintf(metric_bf, sizeof(metric_bf),
793 "%s %s", unit, metric_name);
794 print_metric(config, ctxp, NULL, "%8.1f",
795 metric_bf, ratio);
796 } else {
797 print_metric(config, ctxp, NULL, "%8.1f",
798 metric_name ?
799 metric_name :
800 out->force_header ? name : "",
801 ratio);
802 }
803 } else {
804 print_metric(config, ctxp, NULL, NULL,
805 out->force_header ?
806 (metric_name ? metric_name : name) : "", 0);
807 }
808 } else
809 print_metric(config, ctxp, NULL, NULL, "", 0);
810
811 for (i = 1; i < pctx.num_ids; i++)
812 zfree(&pctx.ids[i].name);
813 }
814
815 void perf_stat__print_shadow_stats(struct perf_stat_config *config,
816 struct evsel *evsel,
817 double avg, int cpu,
818 struct perf_stat_output_ctx *out,
819 struct rblist *metric_events,
820 struct runtime_stat *st)
821 {
822 void *ctxp = out->ctx;
823 print_metric_t print_metric = out->print_metric;
824 double total, ratio = 0.0, total2;
825 const char *color = NULL;
826 int ctx = evsel_context(evsel);
827 struct metric_event *me;
828 int num = 1;
829
830 if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
831 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
832
833 if (total) {
834 ratio = avg / total;
835 print_metric(config, ctxp, NULL, "%7.2f ",
836 "insn per cycle", ratio);
837 } else {
838 print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
839 }
840
841 total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
842 ctx, cpu);
843
844 total = max(total, runtime_stat_avg(st,
845 STAT_STALLED_CYCLES_BACK,
846 ctx, cpu));
847
848 if (total && avg) {
849 out->new_line(config, ctxp);
850 ratio = total / avg;
851 print_metric(config, ctxp, NULL, "%7.2f ",
852 "stalled cycles per insn",
853 ratio);
854 }
855 } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
856 if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
857 print_branch_misses(config, cpu, evsel, avg, out, st);
858 else
859 print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
860 } else if (
861 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
862 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1D |
863 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
864 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
865
866 if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
867 print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
868 else
869 print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0);
870 } else if (
871 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
872 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I |
873 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
874 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
875
876 if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
877 print_l1_icache_misses(config, cpu, evsel, avg, out, st);
878 else
879 print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0);
880 } else if (
881 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
882 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
883 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
884 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
885
886 if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
887 print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
888 else
889 print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0);
890 } else if (
891 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
892 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
893 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
894 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
895
896 if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
897 print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
898 else
899 print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0);
900 } else if (
901 evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
902 evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL |
903 ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
904 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
905
906 if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
907 print_ll_cache_misses(config, cpu, evsel, avg, out, st);
908 else
909 print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0);
910 } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
911 total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
912
913 if (total)
914 ratio = avg * 100 / total;
915
916 if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
917 print_metric(config, ctxp, NULL, "%8.3f %%",
918 "of all cache refs", ratio);
919 else
920 print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
921 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
922 print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
923 } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
924 print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
925 } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
926 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
927
928 if (total) {
929 ratio = avg / total;
930 print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
931 } else {
932 print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
933 }
934 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
935 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
936
937 if (total)
938 print_metric(config, ctxp, NULL,
939 "%7.2f%%", "transactional cycles",
940 100.0 * (avg / total));
941 else
942 print_metric(config, ctxp, NULL, NULL, "transactional cycles",
943 0);
944 } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
945 total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
946 total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
947
948 if (total2 < avg)
949 total2 = avg;
950 if (total)
951 print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
952 100.0 * ((total2-avg) / total));
953 else
954 print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
955 } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
956 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
957 ctx, cpu);
958
959 if (avg)
960 ratio = total / avg;
961
962 if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
963 print_metric(config, ctxp, NULL, "%8.0f",
964 "cycles / transaction", ratio);
965 else
966 print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
967 0);
968 } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
969 total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
970 ctx, cpu);
971
972 if (avg)
973 ratio = total / avg;
974
975 print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
976 } else if (perf_evsel__is_clock(evsel)) {
977 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
978 print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
979 avg / (ratio * evsel->scale));
980 else
981 print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
982 } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
983 double fe_bound = td_fe_bound(ctx, cpu, st);
984
985 if (fe_bound > 0.2)
986 color = PERF_COLOR_RED;
987 print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
988 fe_bound * 100.);
989 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
990 double retiring = td_retiring(ctx, cpu, st);
991
992 if (retiring > 0.7)
993 color = PERF_COLOR_GREEN;
994 print_metric(config, ctxp, color, "%8.1f%%", "retiring",
995 retiring * 100.);
996 } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
997 double bad_spec = td_bad_spec(ctx, cpu, st);
998
999 if (bad_spec > 0.1)
1000 color = PERF_COLOR_RED;
1001 print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
1002 bad_spec * 100.);
1003 } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
1004 double be_bound = td_be_bound(ctx, cpu, st);
1005 const char *name = "backend bound";
1006 static int have_recovery_bubbles = -1;
1007
1008
1009 if (have_recovery_bubbles < 0)
1010 have_recovery_bubbles = pmu_have_event("cpu",
1011 "topdown-recovery-bubbles");
1012 if (!have_recovery_bubbles)
1013 name = "backend bound/bad spec";
1014
1015 if (be_bound > 0.2)
1016 color = PERF_COLOR_RED;
1017 if (td_total_slots(ctx, cpu, st) > 0)
1018 print_metric(config, ctxp, color, "%8.1f%%", name,
1019 be_bound * 100.);
1020 else
1021 print_metric(config, ctxp, NULL, NULL, name, 0);
1022 } else if (evsel->metric_expr) {
1023 generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
1024 evsel->metric_name, NULL, avg, cpu, out, st);
1025 } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
1026 char unit = 'M';
1027 char unit_buf[10];
1028
1029 total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
1030
1031 if (total)
1032 ratio = 1000.0 * avg / total;
1033 if (ratio < 0.001) {
1034 ratio *= 1000;
1035 unit = 'K';
1036 }
1037 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
1038 print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
1039 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
1040 print_smi_cost(config, cpu, evsel, out, st);
1041 } else {
1042 num = 0;
1043 }
1044
1045 if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
1046 struct metric_expr *mexp;
1047
1048 list_for_each_entry (mexp, &me->head, nd) {
1049 if (num++ > 0)
1050 out->new_line(config, ctxp);
1051 generic_metric(config, mexp->metric_expr, mexp->metric_events,
1052 evsel->name, mexp->metric_name,
1053 mexp->metric_unit, avg, cpu, out, st);
1054 }
1055 }
1056 if (num == 0)
1057 print_metric(config, ctxp, NULL, NULL, NULL, 0);
1058 }