This source file includes following definitions.
- domain_is_valid
- is_physical_domain
- domain_needs_aggregation
- domain_name
- catalog_entry_domain_is_valid
- DEFINE_PER_CPU
- event_name
- event_desc
- event_long_desc
- event_fixed_portion_is_within
- event_end
- h_get_24x7_catalog_page_
- h_get_24x7_catalog_page
- event_fmt
- memdup_to_str
- device_show_string
- device_str_attr_create_
- device_str_attr_create
- event_to_attr
- event_to_desc_attr
- event_to_long_desc_attr
- event_data_to_attrs
- memord
- ev_uniq_ord
- event_uniq_add
- event_uniq_destroy
- catalog_event_len_validate
- create_events_from_catalog
- catalog_read
- domains_show
- init_24x7_request
- make_24x7_request
- add_event_to_24x7_request
- get_count_from_result
- single_24x7_request
- h_24x7_event_init
- h_24x7_get_value
- update_event_count
- h_24x7_event_read
- h_24x7_event_start
- h_24x7_event_stop
- h_24x7_event_add
- h_24x7_event_start_txn
- reset_txn
- h_24x7_event_commit_txn
- h_24x7_event_cancel_txn
- hv_24x7_init
1
2
3
4
5
6
7
8
9 #define pr_fmt(fmt) "hv-24x7: " fmt
10
11 #include <linux/perf_event.h>
12 #include <linux/rbtree.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16
17 #include <asm/cputhreads.h>
18 #include <asm/firmware.h>
19 #include <asm/hvcall.h>
20 #include <asm/io.h>
21 #include <linux/byteorder/generic.h>
22
23 #include "hv-24x7.h"
24 #include "hv-24x7-catalog.h"
25 #include "hv-common.h"
26
27
28 static int interface_version;
29
30
31 static bool aggregate_result_elements;
32
33 static bool domain_is_valid(unsigned domain)
34 {
35 switch (domain) {
36 #define DOMAIN(n, v, x, c) \
37 case HV_PERF_DOMAIN_##n: \
38
39 #include "hv-24x7-domains.h"
40 #undef DOMAIN
41 return true;
42 default:
43 return false;
44 }
45 }
46
47 static bool is_physical_domain(unsigned domain)
48 {
49 switch (domain) {
50 #define DOMAIN(n, v, x, c) \
51 case HV_PERF_DOMAIN_##n: \
52 return c;
53 #include "hv-24x7-domains.h"
54 #undef DOMAIN
55 default:
56 return false;
57 }
58 }
59
60
61 static bool domain_needs_aggregation(unsigned int domain)
62 {
63 return aggregate_result_elements &&
64 (domain == HV_PERF_DOMAIN_PHYS_CORE ||
65 (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE &&
66 domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE));
67 }
68
69 static const char *domain_name(unsigned domain)
70 {
71 if (!domain_is_valid(domain))
72 return NULL;
73
74 switch (domain) {
75 case HV_PERF_DOMAIN_PHYS_CHIP: return "Physical Chip";
76 case HV_PERF_DOMAIN_PHYS_CORE: return "Physical Core";
77 case HV_PERF_DOMAIN_VCPU_HOME_CORE: return "VCPU Home Core";
78 case HV_PERF_DOMAIN_VCPU_HOME_CHIP: return "VCPU Home Chip";
79 case HV_PERF_DOMAIN_VCPU_HOME_NODE: return "VCPU Home Node";
80 case HV_PERF_DOMAIN_VCPU_REMOTE_NODE: return "VCPU Remote Node";
81 }
82
83 WARN_ON_ONCE(domain);
84 return NULL;
85 }
86
87 static bool catalog_entry_domain_is_valid(unsigned domain)
88 {
89
90 if (interface_version == 1)
91 return is_physical_domain(domain);
92 else
93 return domain_is_valid(domain);
94 }
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121 EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
122
123 EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
124 EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31);
125 EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
126
127 EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
128
129 EVENT_DEFINE_RANGE_FORMAT(lpar, config1, 0, 15);
130
131 EVENT_DEFINE_RANGE(reserved1, config, 4, 15);
132 EVENT_DEFINE_RANGE(reserved2, config1, 16, 63);
133 EVENT_DEFINE_RANGE(reserved3, config2, 0, 63);
134
135 static struct attribute *format_attrs[] = {
136 &format_attr_domain.attr,
137 &format_attr_offset.attr,
138 &format_attr_core.attr,
139 &format_attr_chip.attr,
140 &format_attr_vcpu.attr,
141 &format_attr_lpar.attr,
142 NULL,
143 };
144
145 static struct attribute_group format_group = {
146 .name = "format",
147 .attrs = format_attrs,
148 };
149
150 static struct attribute_group event_group = {
151 .name = "events",
152
153 };
154
155 static struct attribute_group event_desc_group = {
156 .name = "event_descs",
157
158 };
159
160 static struct attribute_group event_long_desc_group = {
161 .name = "event_long_descs",
162
163 };
164
165 static struct kmem_cache *hv_page_cache;
166
167 DEFINE_PER_CPU(int, hv_24x7_txn_flags);
168 DEFINE_PER_CPU(int, hv_24x7_txn_err);
169
170 struct hv_24x7_hw {
171 struct perf_event *events[255];
172 };
173
174 DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
175
176
177
178
179
180
181 #define H24x7_DATA_BUFFER_SIZE 4096
182 DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
183 DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
184
185 static unsigned int max_num_requests(int interface_version)
186 {
187 return (H24x7_DATA_BUFFER_SIZE - sizeof(struct hv_24x7_request_buffer))
188 / H24x7_REQUEST_SIZE(interface_version);
189 }
190
191 static char *event_name(struct hv_24x7_event_data *ev, int *len)
192 {
193 *len = be16_to_cpu(ev->event_name_len) - 2;
194 return (char *)ev->remainder;
195 }
196
197 static char *event_desc(struct hv_24x7_event_data *ev, int *len)
198 {
199 unsigned nl = be16_to_cpu(ev->event_name_len);
200 __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
201
202 *len = be16_to_cpu(*desc_len) - 2;
203 return (char *)ev->remainder + nl;
204 }
205
206 static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
207 {
208 unsigned nl = be16_to_cpu(ev->event_name_len);
209 __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
210 unsigned desc_len = be16_to_cpu(*desc_len_);
211 __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
212
213 *len = be16_to_cpu(*long_desc_len) - 2;
214 return (char *)ev->remainder + nl + desc_len;
215 }
216
217 static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev,
218 void *end)
219 {
220 void *start = ev;
221
222 return (start + offsetof(struct hv_24x7_event_data, remainder)) < end;
223 }
224
225
226
227
228
229
230
231
232
233 static void *event_end(struct hv_24x7_event_data *ev, void *end)
234 {
235 void *start = ev;
236 __be16 *dl_, *ldl_;
237 unsigned dl, ldl;
238 unsigned nl = be16_to_cpu(ev->event_name_len);
239
240 if (nl < 2) {
241 pr_debug("%s: name length too short: %d", __func__, nl);
242 return NULL;
243 }
244
245 if (start + nl > end) {
246 pr_debug("%s: start=%p + nl=%u > end=%p",
247 __func__, start, nl, end);
248 return NULL;
249 }
250
251 dl_ = (__be16 *)(ev->remainder + nl - 2);
252 if (!IS_ALIGNED((uintptr_t)dl_, 2))
253 pr_warn("desc len not aligned %p", dl_);
254 dl = be16_to_cpu(*dl_);
255 if (dl < 2) {
256 pr_debug("%s: desc len too short: %d", __func__, dl);
257 return NULL;
258 }
259
260 if (start + nl + dl > end) {
261 pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p",
262 __func__, start, nl, dl, start + nl + dl, end);
263 return NULL;
264 }
265
266 ldl_ = (__be16 *)(ev->remainder + nl + dl - 2);
267 if (!IS_ALIGNED((uintptr_t)ldl_, 2))
268 pr_warn("long desc len not aligned %p", ldl_);
269 ldl = be16_to_cpu(*ldl_);
270 if (ldl < 2) {
271 pr_debug("%s: long desc len too short (ldl=%u)",
272 __func__, ldl);
273 return NULL;
274 }
275
276 if (start + nl + dl + ldl > end) {
277 pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p",
278 __func__, start, nl, dl, ldl, end);
279 return NULL;
280 }
281
282 return start + nl + dl + ldl;
283 }
284
285 static long h_get_24x7_catalog_page_(unsigned long phys_4096,
286 unsigned long version, unsigned long index)
287 {
288 pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
289 phys_4096, version, index);
290
291 WARN_ON(!IS_ALIGNED(phys_4096, 4096));
292
293 return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
294 phys_4096, version, index);
295 }
296
297 static long h_get_24x7_catalog_page(char page[], u64 version, u32 index)
298 {
299 return h_get_24x7_catalog_page_(virt_to_phys(page),
300 version, index);
301 }
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339 static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain)
340 {
341 const char *sindex;
342 const char *lpar;
343 const char *domain_str;
344 char buf[8];
345
346 switch (domain) {
347 case HV_PERF_DOMAIN_PHYS_CHIP:
348 snprintf(buf, sizeof(buf), "%d", domain);
349 domain_str = buf;
350 lpar = "0x0";
351 sindex = "chip";
352 break;
353 case HV_PERF_DOMAIN_PHYS_CORE:
354 domain_str = "?";
355 lpar = "0x0";
356 sindex = "core";
357 break;
358 default:
359 domain_str = "?";
360 lpar = "?";
361 sindex = "vcpu";
362 }
363
364 return kasprintf(GFP_KERNEL,
365 "domain=%s,offset=0x%x,%s=?,lpar=%s",
366 domain_str,
367 be16_to_cpu(event->event_counter_offs) +
368 be16_to_cpu(event->event_group_record_offs),
369 sindex,
370 lpar);
371 }
372
373
374 static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp)
375 {
376 return kasprintf(gfp, "%.*s", max_len, maybe_str);
377 }
378
379 static ssize_t device_show_string(struct device *dev,
380 struct device_attribute *attr, char *buf)
381 {
382 struct dev_ext_attribute *d;
383
384 d = container_of(attr, struct dev_ext_attribute, attr);
385
386 return sprintf(buf, "%s\n", (char *)d->var);
387 }
388
389 static struct attribute *device_str_attr_create_(char *name, char *str)
390 {
391 struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
392
393 if (!attr)
394 return NULL;
395
396 sysfs_attr_init(&attr->attr.attr);
397
398 attr->var = str;
399 attr->attr.attr.name = name;
400 attr->attr.attr.mode = 0444;
401 attr->attr.show = device_show_string;
402
403 return &attr->attr.attr;
404 }
405
406
407
408
409
410
411
412
413
414
415 static struct attribute *device_str_attr_create(char *name, int name_max,
416 int name_nonce,
417 char *str, size_t str_max)
418 {
419 char *n;
420 char *s = memdup_to_str(str, str_max, GFP_KERNEL);
421 struct attribute *a;
422
423 if (!s)
424 return NULL;
425
426 if (!name_nonce)
427 n = kasprintf(GFP_KERNEL, "%.*s", name_max, name);
428 else
429 n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name,
430 name_nonce);
431 if (!n)
432 goto out_s;
433
434 a = device_str_attr_create_(n, s);
435 if (!a)
436 goto out_n;
437
438 return a;
439 out_n:
440 kfree(n);
441 out_s:
442 kfree(s);
443 return NULL;
444 }
445
446 static struct attribute *event_to_attr(unsigned ix,
447 struct hv_24x7_event_data *event,
448 unsigned domain,
449 int nonce)
450 {
451 int event_name_len;
452 char *ev_name, *a_ev_name, *val;
453 struct attribute *attr;
454
455 if (!domain_is_valid(domain)) {
456 pr_warn("catalog event %u has invalid domain %u\n",
457 ix, domain);
458 return NULL;
459 }
460
461 val = event_fmt(event, domain);
462 if (!val)
463 return NULL;
464
465 ev_name = event_name(event, &event_name_len);
466 if (!nonce)
467 a_ev_name = kasprintf(GFP_KERNEL, "%.*s",
468 (int)event_name_len, ev_name);
469 else
470 a_ev_name = kasprintf(GFP_KERNEL, "%.*s__%d",
471 (int)event_name_len, ev_name, nonce);
472
473 if (!a_ev_name)
474 goto out_val;
475
476 attr = device_str_attr_create_(a_ev_name, val);
477 if (!attr)
478 goto out_name;
479
480 return attr;
481 out_name:
482 kfree(a_ev_name);
483 out_val:
484 kfree(val);
485 return NULL;
486 }
487
488 static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event,
489 int nonce)
490 {
491 int nl, dl;
492 char *name = event_name(event, &nl);
493 char *desc = event_desc(event, &dl);
494
495
496 if (!dl)
497 return NULL;
498
499 return device_str_attr_create(name, nl, nonce, desc, dl);
500 }
501
502 static struct attribute *
503 event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
504 {
505 int nl, dl;
506 char *name = event_name(event, &nl);
507 char *desc = event_long_desc(event, &dl);
508
509
510 if (!dl)
511 return NULL;
512
513 return device_str_attr_create(name, nl, nonce, desc, dl);
514 }
515
516 static int event_data_to_attrs(unsigned ix, struct attribute **attrs,
517 struct hv_24x7_event_data *event, int nonce)
518 {
519 *attrs = event_to_attr(ix, event, event->domain, nonce);
520 if (!*attrs)
521 return -1;
522
523 return 0;
524 }
525
526
527 struct event_uniq {
528 struct rb_node node;
529 const char *name;
530 int nl;
531 unsigned ct;
532 unsigned domain;
533 };
534
535 static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
536 {
537 if (s1 < s2)
538 return 1;
539 if (s1 > s2)
540 return -1;
541
542 return memcmp(d1, d2, s1);
543 }
544
545 static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2,
546 size_t s2, unsigned d2)
547 {
548 int r = memord(v1, s1, v2, s2);
549
550 if (r)
551 return r;
552 if (d1 > d2)
553 return 1;
554 if (d2 > d1)
555 return -1;
556 return 0;
557 }
558
559 static int event_uniq_add(struct rb_root *root, const char *name, int nl,
560 unsigned domain)
561 {
562 struct rb_node **new = &(root->rb_node), *parent = NULL;
563 struct event_uniq *data;
564
565
566 while (*new) {
567 struct event_uniq *it;
568 int result;
569
570 it = rb_entry(*new, struct event_uniq, node);
571 result = ev_uniq_ord(name, nl, domain, it->name, it->nl,
572 it->domain);
573
574 parent = *new;
575 if (result < 0)
576 new = &((*new)->rb_left);
577 else if (result > 0)
578 new = &((*new)->rb_right);
579 else {
580 it->ct++;
581 pr_info("found a duplicate event %.*s, ct=%u\n", nl,
582 name, it->ct);
583 return it->ct;
584 }
585 }
586
587 data = kmalloc(sizeof(*data), GFP_KERNEL);
588 if (!data)
589 return -ENOMEM;
590
591 *data = (struct event_uniq) {
592 .name = name,
593 .nl = nl,
594 .ct = 0,
595 .domain = domain,
596 };
597
598
599 rb_link_node(&data->node, parent, new);
600 rb_insert_color(&data->node, root);
601
602
603 return 0;
604 }
605
606 static void event_uniq_destroy(struct rb_root *root)
607 {
608
609
610
611
612 struct event_uniq *pos, *n;
613
614 rbtree_postorder_for_each_entry_safe(pos, n, root, node)
615 kfree(pos);
616 }
617
618
619
620
621
622
623
624
625
626 static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
627 size_t event_idx,
628 size_t event_data_bytes,
629 size_t event_entry_count,
630 size_t offset, void *end)
631 {
632 ssize_t ev_len;
633 void *ev_end, *calc_ev_end;
634
635 if (offset >= event_data_bytes)
636 return -1;
637
638 if (event_idx >= event_entry_count) {
639 pr_devel("catalog event data has %zu bytes of padding after last event\n",
640 event_data_bytes - offset);
641 return -1;
642 }
643
644 if (!event_fixed_portion_is_within(event, end)) {
645 pr_warn("event %zu fixed portion is not within range\n",
646 event_idx);
647 return -1;
648 }
649
650 ev_len = be16_to_cpu(event->length);
651
652 if (ev_len % 16)
653 pr_info("event %zu has length %zu not divisible by 16: event=%pK\n",
654 event_idx, ev_len, event);
655
656 ev_end = (__u8 *)event + ev_len;
657 if (ev_end > end) {
658 pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n",
659 event_idx, ev_len, ev_end, end,
660 offset);
661 return -1;
662 }
663
664 calc_ev_end = event_end(event, end);
665 if (!calc_ev_end) {
666 pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n",
667 event_idx, event_data_bytes, event, end,
668 offset);
669 return -1;
670 }
671
672 if (calc_ev_end > ev_end) {
673 pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n",
674 event_idx, event, ev_end, offset, calc_ev_end);
675 return -1;
676 }
677
678 return ev_len;
679 }
680
681 #define MAX_4K (SIZE_MAX / 4096)
682
683 static int create_events_from_catalog(struct attribute ***events_,
684 struct attribute ***event_descs_,
685 struct attribute ***event_long_descs_)
686 {
687 long hret;
688 size_t catalog_len, catalog_page_len, event_entry_count,
689 event_data_len, event_data_offs,
690 event_data_bytes, junk_events, event_idx, event_attr_ct, i,
691 attr_max, event_idx_last, desc_ct, long_desc_ct;
692 ssize_t ct, ev_len;
693 uint64_t catalog_version_num;
694 struct attribute **events, **event_descs, **event_long_descs;
695 struct hv_24x7_catalog_page_0 *page_0 =
696 kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
697 void *page = page_0;
698 void *event_data, *end;
699 struct hv_24x7_event_data *event;
700 struct rb_root ev_uniq = RB_ROOT;
701 int ret = 0;
702
703 if (!page) {
704 ret = -ENOMEM;
705 goto e_out;
706 }
707
708 hret = h_get_24x7_catalog_page(page, 0, 0);
709 if (hret) {
710 ret = -EIO;
711 goto e_free;
712 }
713
714 catalog_version_num = be64_to_cpu(page_0->version);
715 catalog_page_len = be32_to_cpu(page_0->length);
716
717 if (MAX_4K < catalog_page_len) {
718 pr_err("invalid page count: %zu\n", catalog_page_len);
719 ret = -EIO;
720 goto e_free;
721 }
722
723 catalog_len = catalog_page_len * 4096;
724
725 event_entry_count = be16_to_cpu(page_0->event_entry_count);
726 event_data_offs = be16_to_cpu(page_0->event_data_offs);
727 event_data_len = be16_to_cpu(page_0->event_data_len);
728
729 pr_devel("cv %llu cl %zu eec %zu edo %zu edl %zu\n",
730 catalog_version_num, catalog_len,
731 event_entry_count, event_data_offs, event_data_len);
732
733 if ((MAX_4K < event_data_len)
734 || (MAX_4K < event_data_offs)
735 || (MAX_4K - event_data_offs < event_data_len)) {
736 pr_err("invalid event data offs %zu and/or len %zu\n",
737 event_data_offs, event_data_len);
738 ret = -EIO;
739 goto e_free;
740 }
741
742 if ((event_data_offs + event_data_len) > catalog_page_len) {
743 pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n",
744 event_data_offs,
745 event_data_offs + event_data_len,
746 catalog_page_len);
747 ret = -EIO;
748 goto e_free;
749 }
750
751 if (SIZE_MAX - 1 < event_entry_count) {
752 pr_err("event_entry_count %zu is invalid\n", event_entry_count);
753 ret = -EIO;
754 goto e_free;
755 }
756
757 event_data_bytes = event_data_len * 4096;
758
759
760
761
762
763 event_data = vmalloc(event_data_bytes);
764 if (!event_data) {
765 pr_err("could not allocate event data\n");
766 ret = -ENOMEM;
767 goto e_free;
768 }
769
770 end = event_data + event_data_bytes;
771
772
773
774
775
776 BUILD_BUG_ON(PAGE_SIZE % 4096);
777
778 for (i = 0; i < event_data_len; i++) {
779 hret = h_get_24x7_catalog_page_(
780 vmalloc_to_phys(event_data + i * 4096),
781 catalog_version_num,
782 i + event_data_offs);
783 if (hret) {
784 pr_err("Failed to get event data in page %zu: rc=%ld\n",
785 i + event_data_offs, hret);
786 ret = -EIO;
787 goto e_event_data;
788 }
789 }
790
791
792
793
794
795 for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0;
796 ;
797 event_idx++, event = (void *)event + ev_len) {
798 size_t offset = (void *)event - (void *)event_data;
799 char *name;
800 int nl;
801
802 ev_len = catalog_event_len_validate(event, event_idx,
803 event_data_bytes,
804 event_entry_count,
805 offset, end);
806 if (ev_len < 0)
807 break;
808
809 name = event_name(event, &nl);
810
811 if (event->event_group_record_len == 0) {
812 pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
813 event_idx, nl, name);
814 junk_events++;
815 continue;
816 }
817
818 if (!catalog_entry_domain_is_valid(event->domain)) {
819 pr_info("event %zu (%.*s) has invalid domain %d\n",
820 event_idx, nl, name, event->domain);
821 junk_events++;
822 continue;
823 }
824
825 attr_max++;
826 }
827
828 event_idx_last = event_idx;
829 if (event_idx_last != event_entry_count)
830 pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n",
831 event_idx_last, event_entry_count, junk_events);
832
833 events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
834 if (!events) {
835 ret = -ENOMEM;
836 goto e_event_data;
837 }
838
839 event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
840 GFP_KERNEL);
841 if (!event_descs) {
842 ret = -ENOMEM;
843 goto e_event_attrs;
844 }
845
846 event_long_descs = kmalloc_array(event_idx + 1,
847 sizeof(*event_long_descs), GFP_KERNEL);
848 if (!event_long_descs) {
849 ret = -ENOMEM;
850 goto e_event_descs;
851 }
852
853
854 for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
855 event = event_data, event_idx = 0;
856 event_idx < event_idx_last;
857 event_idx++, ev_len = be16_to_cpu(event->length),
858 event = (void *)event + ev_len) {
859 char *name;
860 int nl;
861 int nonce;
862
863
864
865
866 if (event->event_group_record_len == 0)
867 continue;
868 if (!catalog_entry_domain_is_valid(event->domain))
869 continue;
870
871 name = event_name(event, &nl);
872 nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
873 ct = event_data_to_attrs(event_idx, events + event_attr_ct,
874 event, nonce);
875 if (ct < 0) {
876 pr_warn("event %zu (%.*s) creation failure, skipping\n",
877 event_idx, nl, name);
878 junk_events++;
879 } else {
880 event_attr_ct++;
881 event_descs[desc_ct] = event_to_desc_attr(event, nonce);
882 if (event_descs[desc_ct])
883 desc_ct++;
884 event_long_descs[long_desc_ct] =
885 event_to_long_desc_attr(event, nonce);
886 if (event_long_descs[long_desc_ct])
887 long_desc_ct++;
888 }
889 }
890
891 pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n",
892 event_idx, event_attr_ct, junk_events, desc_ct);
893
894 events[event_attr_ct] = NULL;
895 event_descs[desc_ct] = NULL;
896 event_long_descs[long_desc_ct] = NULL;
897
898 event_uniq_destroy(&ev_uniq);
899 vfree(event_data);
900 kmem_cache_free(hv_page_cache, page);
901
902 *events_ = events;
903 *event_descs_ = event_descs;
904 *event_long_descs_ = event_long_descs;
905 return 0;
906
907 e_event_descs:
908 kfree(event_descs);
909 e_event_attrs:
910 kfree(events);
911 e_event_data:
912 vfree(event_data);
913 e_free:
914 kmem_cache_free(hv_page_cache, page);
915 e_out:
916 *events_ = NULL;
917 *event_descs_ = NULL;
918 *event_long_descs_ = NULL;
919 return ret;
920 }
921
922 static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
923 struct bin_attribute *bin_attr, char *buf,
924 loff_t offset, size_t count)
925 {
926 long hret;
927 ssize_t ret = 0;
928 size_t catalog_len = 0, catalog_page_len = 0;
929 loff_t page_offset = 0;
930 loff_t offset_in_page;
931 size_t copy_len;
932 uint64_t catalog_version_num = 0;
933 void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
934 struct hv_24x7_catalog_page_0 *page_0 = page;
935
936 if (!page)
937 return -ENOMEM;
938
939 hret = h_get_24x7_catalog_page(page, 0, 0);
940 if (hret) {
941 ret = -EIO;
942 goto e_free;
943 }
944
945 catalog_version_num = be64_to_cpu(page_0->version);
946 catalog_page_len = be32_to_cpu(page_0->length);
947 catalog_len = catalog_page_len * 4096;
948
949 page_offset = offset / 4096;
950 offset_in_page = offset % 4096;
951
952 if (page_offset >= catalog_page_len)
953 goto e_free;
954
955 if (page_offset != 0) {
956 hret = h_get_24x7_catalog_page(page, catalog_version_num,
957 page_offset);
958 if (hret) {
959 ret = -EIO;
960 goto e_free;
961 }
962 }
963
964 copy_len = 4096 - offset_in_page;
965 if (copy_len > count)
966 copy_len = count;
967
968 memcpy(buf, page+offset_in_page, copy_len);
969 ret = copy_len;
970
971 e_free:
972 if (hret)
973 pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
974 " rc=%ld\n",
975 catalog_version_num, page_offset, hret);
976 kmem_cache_free(hv_page_cache, page);
977
978 pr_devel("catalog_read: offset=%lld(%lld) count=%zu "
979 "catalog_len=%zu(%zu) => %zd\n", offset, page_offset,
980 count, catalog_len, catalog_page_len, ret);
981
982 return ret;
983 }
984
985 static ssize_t domains_show(struct device *dev, struct device_attribute *attr,
986 char *page)
987 {
988 int d, n, count = 0;
989 const char *str;
990
991 for (d = 0; d < HV_PERF_DOMAIN_MAX; d++) {
992 str = domain_name(d);
993 if (!str)
994 continue;
995
996 n = sprintf(page, "%d: %s\n", d, str);
997 if (n < 0)
998 break;
999
1000 count += n;
1001 page += n;
1002 }
1003 return count;
1004 }
1005
1006 #define PAGE_0_ATTR(_name, _fmt, _expr) \
1007 static ssize_t _name##_show(struct device *dev, \
1008 struct device_attribute *dev_attr, \
1009 char *buf) \
1010 { \
1011 long hret; \
1012 ssize_t ret = 0; \
1013 void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \
1014 struct hv_24x7_catalog_page_0 *page_0 = page; \
1015 if (!page) \
1016 return -ENOMEM; \
1017 hret = h_get_24x7_catalog_page(page, 0, 0); \
1018 if (hret) { \
1019 ret = -EIO; \
1020 goto e_free; \
1021 } \
1022 ret = sprintf(buf, _fmt, _expr); \
1023 e_free: \
1024 kmem_cache_free(hv_page_cache, page); \
1025 return ret; \
1026 } \
1027 static DEVICE_ATTR_RO(_name)
1028
1029 PAGE_0_ATTR(catalog_version, "%lld\n",
1030 (unsigned long long)be64_to_cpu(page_0->version));
1031 PAGE_0_ATTR(catalog_len, "%lld\n",
1032 (unsigned long long)be32_to_cpu(page_0->length) * 4096);
1033 static BIN_ATTR_RO(catalog, 0);
1034 static DEVICE_ATTR_RO(domains);
1035
1036 static struct bin_attribute *if_bin_attrs[] = {
1037 &bin_attr_catalog,
1038 NULL,
1039 };
1040
1041 static struct attribute *if_attrs[] = {
1042 &dev_attr_catalog_len.attr,
1043 &dev_attr_catalog_version.attr,
1044 &dev_attr_domains.attr,
1045 NULL,
1046 };
1047
1048 static struct attribute_group if_group = {
1049 .name = "interface",
1050 .bin_attrs = if_bin_attrs,
1051 .attrs = if_attrs,
1052 };
1053
1054 static const struct attribute_group *attr_groups[] = {
1055 &format_group,
1056 &event_group,
1057 &event_desc_group,
1058 &event_long_desc_group,
1059 &if_group,
1060 NULL,
1061 };
1062
1063
1064
1065
1066 static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1067 struct hv_24x7_data_result_buffer *result_buffer)
1068 {
1069
1070 memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE);
1071 memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE);
1072
1073 request_buffer->interface_version = interface_version;
1074
1075 }
1076
1077
1078
1079
1080
1081 static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
1082 struct hv_24x7_data_result_buffer *result_buffer)
1083 {
1084 long ret;
1085
1086
1087
1088
1089
1090
1091 ret = plpar_hcall_norets(H_GET_24X7_DATA,
1092 virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
1093 virt_to_phys(result_buffer), H24x7_DATA_BUFFER_SIZE);
1094
1095 if (ret) {
1096 struct hv_24x7_request *req;
1097
1098 req = request_buffer->requests;
1099 pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
1100 req->performance_domain, req->data_offset,
1101 req->starting_ix, req->starting_lpar_ix,
1102 ret, ret, result_buffer->detailed_rc,
1103 result_buffer->failing_request_ix);
1104 return -EIO;
1105 }
1106
1107 return 0;
1108 }
1109
1110
1111
1112
1113
1114
1115
1116
1117 static int add_event_to_24x7_request(struct perf_event *event,
1118 struct hv_24x7_request_buffer *request_buffer)
1119 {
1120 u16 idx;
1121 int i;
1122 size_t req_size;
1123 struct hv_24x7_request *req;
1124
1125 if (request_buffer->num_requests >=
1126 max_num_requests(request_buffer->interface_version)) {
1127 pr_devel("Too many requests for 24x7 HCALL %d\n",
1128 request_buffer->num_requests);
1129 return -EINVAL;
1130 }
1131
1132 switch (event_get_domain(event)) {
1133 case HV_PERF_DOMAIN_PHYS_CHIP:
1134 idx = event_get_chip(event);
1135 break;
1136 case HV_PERF_DOMAIN_PHYS_CORE:
1137 idx = event_get_core(event);
1138 break;
1139 default:
1140 idx = event_get_vcpu(event);
1141 }
1142
1143 req_size = H24x7_REQUEST_SIZE(request_buffer->interface_version);
1144
1145 i = request_buffer->num_requests++;
1146 req = (void *) request_buffer->requests + i * req_size;
1147
1148 req->performance_domain = event_get_domain(event);
1149 req->data_size = cpu_to_be16(8);
1150 req->data_offset = cpu_to_be32(event_get_offset(event));
1151 req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event));
1152 req->max_num_lpars = cpu_to_be16(1);
1153 req->starting_ix = cpu_to_be16(idx);
1154 req->max_ix = cpu_to_be16(1);
1155
1156 if (request_buffer->interface_version > 1) {
1157 if (domain_needs_aggregation(req->performance_domain))
1158 req->max_num_thread_groups = -1;
1159 else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
1160 req->starting_thread_group_ix = idx % 2;
1161 req->max_num_thread_groups = 1;
1162 }
1163 }
1164
1165 return 0;
1166 }
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180 static int get_count_from_result(struct perf_event *event,
1181 struct hv_24x7_data_result_buffer *resb,
1182 struct hv_24x7_result *res, u64 *countp,
1183 struct hv_24x7_result **next)
1184 {
1185 u16 num_elements = be16_to_cpu(res->num_elements_returned);
1186 u16 data_size = be16_to_cpu(res->result_element_data_size);
1187 unsigned int data_offset;
1188 void *element_data;
1189 int i;
1190 u64 count;
1191
1192
1193
1194
1195 if (!num_elements) {
1196 pr_debug("Result of request %hhu is empty, nothing to do\n",
1197 res->result_ix);
1198
1199 if (next)
1200 *next = (struct hv_24x7_result *) res->elements;
1201
1202 return -ENODATA;
1203 }
1204
1205
1206
1207
1208
1209
1210 if (num_elements != 1 &&
1211 !domain_needs_aggregation(event_get_domain(event))) {
1212 pr_err("Error: result of request %hhu has %hu elements\n",
1213 res->result_ix, num_elements);
1214
1215 return -EIO;
1216 }
1217
1218 if (data_size != sizeof(u64)) {
1219 pr_debug("Error: result of request %hhu has data of %hu bytes\n",
1220 res->result_ix, data_size);
1221
1222 return -ENOTSUPP;
1223 }
1224
1225 if (resb->interface_version == 1)
1226 data_offset = offsetof(struct hv_24x7_result_element_v1,
1227 element_data);
1228 else
1229 data_offset = offsetof(struct hv_24x7_result_element_v2,
1230 element_data);
1231
1232
1233 for (i = count = 0, element_data = res->elements + data_offset;
1234 i < num_elements;
1235 i++, element_data += data_size + data_offset)
1236 count += be64_to_cpu(*((u64 *) element_data));
1237
1238 *countp = count;
1239
1240
1241 if (next)
1242 *next = element_data - data_offset;
1243
1244 return 0;
1245 }
1246
1247 static int single_24x7_request(struct perf_event *event, u64 *count)
1248 {
1249 int ret;
1250 struct hv_24x7_request_buffer *request_buffer;
1251 struct hv_24x7_data_result_buffer *result_buffer;
1252
1253 BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
1254 BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
1255
1256 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1257 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1258
1259 init_24x7_request(request_buffer, result_buffer);
1260
1261 ret = add_event_to_24x7_request(event, request_buffer);
1262 if (ret)
1263 goto out;
1264
1265 ret = make_24x7_request(request_buffer, result_buffer);
1266 if (ret)
1267 goto out;
1268
1269
1270 ret = get_count_from_result(event, result_buffer,
1271 result_buffer->results, count, NULL);
1272
1273 out:
1274 put_cpu_var(hv_24x7_reqb);
1275 put_cpu_var(hv_24x7_resb);
1276 return ret;
1277 }
1278
1279
1280 static int h_24x7_event_init(struct perf_event *event)
1281 {
1282 struct hv_perf_caps caps;
1283 unsigned domain;
1284 unsigned long hret;
1285 u64 ct;
1286
1287
1288 if (event->attr.type != event->pmu->type)
1289 return -ENOENT;
1290
1291
1292 if (event_get_reserved1(event) ||
1293 event_get_reserved2(event) ||
1294 event_get_reserved3(event)) {
1295 pr_devel("reserved set when forbidden 0x%llx(0x%llx) 0x%llx(0x%llx) 0x%llx(0x%llx)\n",
1296 event->attr.config,
1297 event_get_reserved1(event),
1298 event->attr.config1,
1299 event_get_reserved2(event),
1300 event->attr.config2,
1301 event_get_reserved3(event));
1302 return -EINVAL;
1303 }
1304
1305
1306 if (has_branch_stack(event))
1307 return -EOPNOTSUPP;
1308
1309
1310 if (event_get_offset(event) % 8) {
1311 pr_devel("bad alignment\n");
1312 return -EINVAL;
1313 }
1314
1315 domain = event_get_domain(event);
1316 if (domain >= HV_PERF_DOMAIN_MAX) {
1317 pr_devel("invalid domain %d\n", domain);
1318 return -EINVAL;
1319 }
1320
1321 hret = hv_perf_caps_get(&caps);
1322 if (hret) {
1323 pr_devel("could not get capabilities: rc=%ld\n", hret);
1324 return -EIO;
1325 }
1326
1327
1328 if (!caps.collect_privileged && (is_physical_domain(domain) ||
1329 (event_get_lpar(event) != event_get_lpar_max()))) {
1330 pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n",
1331 is_physical_domain(domain),
1332 event_get_lpar(event));
1333 return -EACCES;
1334 }
1335
1336
1337 if (single_24x7_request(event, &ct)) {
1338 pr_devel("test hcall failed\n");
1339 return -EIO;
1340 }
1341 (void)local64_xchg(&event->hw.prev_count, ct);
1342
1343 return 0;
1344 }
1345
1346 static u64 h_24x7_get_value(struct perf_event *event)
1347 {
1348 u64 ct;
1349
1350 if (single_24x7_request(event, &ct))
1351
1352 return 0;
1353
1354 return ct;
1355 }
1356
1357 static void update_event_count(struct perf_event *event, u64 now)
1358 {
1359 s64 prev;
1360
1361 prev = local64_xchg(&event->hw.prev_count, now);
1362 local64_add(now - prev, &event->count);
1363 }
1364
1365 static void h_24x7_event_read(struct perf_event *event)
1366 {
1367 u64 now;
1368 struct hv_24x7_request_buffer *request_buffer;
1369 struct hv_24x7_hw *h24x7hw;
1370 int txn_flags;
1371
1372 txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1373
1374
1375
1376
1377
1378
1379
1380
1381 if (txn_flags & PERF_PMU_TXN_READ) {
1382 int i;
1383 int ret;
1384
1385 if (__this_cpu_read(hv_24x7_txn_err))
1386 return;
1387
1388 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1389
1390 ret = add_event_to_24x7_request(event, request_buffer);
1391 if (ret) {
1392 __this_cpu_write(hv_24x7_txn_err, ret);
1393 } else {
1394
1395
1396
1397
1398 i = request_buffer->num_requests - 1;
1399
1400 h24x7hw = &get_cpu_var(hv_24x7_hw);
1401 h24x7hw->events[i] = event;
1402 put_cpu_var(h24x7hw);
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412 local64_set(&event->count, 0);
1413 }
1414
1415 put_cpu_var(hv_24x7_reqb);
1416 } else {
1417 now = h_24x7_get_value(event);
1418 update_event_count(event, now);
1419 }
1420 }
1421
1422 static void h_24x7_event_start(struct perf_event *event, int flags)
1423 {
1424 if (flags & PERF_EF_RELOAD)
1425 local64_set(&event->hw.prev_count, h_24x7_get_value(event));
1426 }
1427
1428 static void h_24x7_event_stop(struct perf_event *event, int flags)
1429 {
1430 h_24x7_event_read(event);
1431 }
1432
1433 static int h_24x7_event_add(struct perf_event *event, int flags)
1434 {
1435 if (flags & PERF_EF_START)
1436 h_24x7_event_start(event, flags);
1437
1438 return 0;
1439 }
1440
1441
1442
1443
1444
1445
1446
1447 static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags)
1448 {
1449 struct hv_24x7_request_buffer *request_buffer;
1450 struct hv_24x7_data_result_buffer *result_buffer;
1451
1452
1453 WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags));
1454
1455 __this_cpu_write(hv_24x7_txn_flags, flags);
1456 if (flags & ~PERF_PMU_TXN_READ)
1457 return;
1458
1459 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1460 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1461
1462 init_24x7_request(request_buffer, result_buffer);
1463
1464 put_cpu_var(hv_24x7_resb);
1465 put_cpu_var(hv_24x7_reqb);
1466 }
1467
1468
1469
1470
1471
1472
1473
1474 static void reset_txn(void)
1475 {
1476 __this_cpu_write(hv_24x7_txn_flags, 0);
1477 __this_cpu_write(hv_24x7_txn_err, 0);
1478 }
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489 static int h_24x7_event_commit_txn(struct pmu *pmu)
1490 {
1491 struct hv_24x7_request_buffer *request_buffer;
1492 struct hv_24x7_data_result_buffer *result_buffer;
1493 struct hv_24x7_result *res, *next_res;
1494 u64 count;
1495 int i, ret, txn_flags;
1496 struct hv_24x7_hw *h24x7hw;
1497
1498 txn_flags = __this_cpu_read(hv_24x7_txn_flags);
1499 WARN_ON_ONCE(!txn_flags);
1500
1501 ret = 0;
1502 if (txn_flags & ~PERF_PMU_TXN_READ)
1503 goto out;
1504
1505 ret = __this_cpu_read(hv_24x7_txn_err);
1506 if (ret)
1507 goto out;
1508
1509 request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
1510 result_buffer = (void *)get_cpu_var(hv_24x7_resb);
1511
1512 ret = make_24x7_request(request_buffer, result_buffer);
1513 if (ret)
1514 goto put_reqb;
1515
1516 h24x7hw = &get_cpu_var(hv_24x7_hw);
1517
1518
1519 for (i = 0, res = result_buffer->results;
1520 i < result_buffer->num_results; i++, res = next_res) {
1521 struct perf_event *event = h24x7hw->events[res->result_ix];
1522
1523 ret = get_count_from_result(event, result_buffer, res, &count,
1524 &next_res);
1525 if (ret)
1526 break;
1527
1528 update_event_count(event, count);
1529 }
1530
1531 put_cpu_var(hv_24x7_hw);
1532
1533 put_reqb:
1534 put_cpu_var(hv_24x7_resb);
1535 put_cpu_var(hv_24x7_reqb);
1536 out:
1537 reset_txn();
1538 return ret;
1539 }
1540
1541
1542
1543
1544
1545
1546
1547 static void h_24x7_event_cancel_txn(struct pmu *pmu)
1548 {
1549 WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags));
1550 reset_txn();
1551 }
1552
1553 static struct pmu h_24x7_pmu = {
1554 .task_ctx_nr = perf_invalid_context,
1555
1556 .name = "hv_24x7",
1557 .attr_groups = attr_groups,
1558 .event_init = h_24x7_event_init,
1559 .add = h_24x7_event_add,
1560 .del = h_24x7_event_stop,
1561 .start = h_24x7_event_start,
1562 .stop = h_24x7_event_stop,
1563 .read = h_24x7_event_read,
1564 .start_txn = h_24x7_event_start_txn,
1565 .commit_txn = h_24x7_event_commit_txn,
1566 .cancel_txn = h_24x7_event_cancel_txn,
1567 .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
1568 };
1569
1570 static int hv_24x7_init(void)
1571 {
1572 int r;
1573 unsigned long hret;
1574 struct hv_perf_caps caps;
1575
1576 if (!firmware_has_feature(FW_FEATURE_LPAR)) {
1577 pr_debug("not a virtualized system, not enabling\n");
1578 return -ENODEV;
1579 } else if (!cur_cpu_spec->oprofile_cpu_type)
1580 return -ENODEV;
1581
1582
1583 if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
1584 interface_version = 1;
1585 else {
1586 interface_version = 2;
1587
1588
1589 if (threads_per_core == 8)
1590 aggregate_result_elements = true;
1591 }
1592
1593 hret = hv_perf_caps_get(&caps);
1594 if (hret) {
1595 pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
1596 hret);
1597 return -ENODEV;
1598 }
1599
1600 hv_page_cache = kmem_cache_create("hv-page-4096", 4096, 4096, 0, NULL);
1601 if (!hv_page_cache)
1602 return -ENOMEM;
1603
1604
1605 h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
1606
1607 r = create_events_from_catalog(&event_group.attrs,
1608 &event_desc_group.attrs,
1609 &event_long_desc_group.attrs);
1610
1611 if (r)
1612 return r;
1613
1614 r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
1615 if (r)
1616 return r;
1617
1618 return 0;
1619 }
1620
1621 device_initcall(hv_24x7_init);