This source file includes following definitions.
- get_prefetch_disable_bits
- pseudo_lock_minor_get
- pseudo_lock_minor_release
- region_find_by_minor
- pseudo_lock_cstates_relax
- pseudo_lock_cstates_constrain
- pseudo_lock_region_clear
- pseudo_lock_region_init
- pseudo_lock_init
- pseudo_lock_region_alloc
- pseudo_lock_free
- pseudo_lock_fn
- rdtgroup_monitor_in_progress
- rdtgroup_locksetup_user_restrict
- rdtgroup_locksetup_user_restore
- rdtgroup_locksetup_enter
- rdtgroup_locksetup_exit
- rdtgroup_cbm_overlaps_pseudo_locked
- rdtgroup_pseudo_locked_in_hierarchy
- measure_cycles_lat_fn
- measure_residency_fn
- measure_l2_residency
- measure_l3_residency
- pseudo_lock_measure_cycles
- pseudo_lock_measure_trigger
- rdtgroup_pseudo_lock_create
- rdtgroup_pseudo_lock_remove
- pseudo_lock_dev_open
- pseudo_lock_dev_release
- pseudo_lock_dev_mremap
- pseudo_lock_dev_mmap
- pseudo_lock_devnode
- rdt_pseudo_lock_init
- rdt_pseudo_lock_release
1
2
3
4
5
6
7
8
9
10
11
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14 #include <linux/cacheinfo.h>
15 #include <linux/cpu.h>
16 #include <linux/cpumask.h>
17 #include <linux/debugfs.h>
18 #include <linux/kthread.h>
19 #include <linux/mman.h>
20 #include <linux/perf_event.h>
21 #include <linux/pm_qos.h>
22 #include <linux/slab.h>
23 #include <linux/uaccess.h>
24
25 #include <asm/cacheflush.h>
26 #include <asm/intel-family.h>
27 #include <asm/resctrl_sched.h>
28 #include <asm/perf_event.h>
29
30 #include "../../events/perf_event.h"
31 #include "internal.h"
32
33 #define CREATE_TRACE_POINTS
34 #include "pseudo_lock_event.h"
35
36
37
38
39
40 static u64 prefetch_disable_bits;
41
42
43
44
45
46 static unsigned int pseudo_lock_major;
47 static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
48 static struct class *pseudo_lock_class;
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70 static u64 get_prefetch_disable_bits(void)
71 {
72 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
73 boot_cpu_data.x86 != 6)
74 return 0;
75
76 switch (boot_cpu_data.x86_model) {
77 case INTEL_FAM6_BROADWELL_X:
78
79
80
81
82
83
84
85
86
87 return 0xF;
88 case INTEL_FAM6_ATOM_GOLDMONT:
89 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
90
91
92
93
94
95
96
97
98 return 0x5;
99 }
100
101 return 0;
102 }
103
104
105
106
107
108
109
110
111
112
113 static int pseudo_lock_minor_get(unsigned int *minor)
114 {
115 unsigned long first_bit;
116
117 first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS);
118
119 if (first_bit == MINORBITS)
120 return -ENOSPC;
121
122 __clear_bit(first_bit, &pseudo_lock_minor_avail);
123 *minor = first_bit;
124
125 return 0;
126 }
127
128
129
130
131
132 static void pseudo_lock_minor_release(unsigned int minor)
133 {
134 __set_bit(minor, &pseudo_lock_minor_avail);
135 }
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151 static struct rdtgroup *region_find_by_minor(unsigned int minor)
152 {
153 struct rdtgroup *rdtgrp, *rdtgrp_match = NULL;
154
155 list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
156 if (rdtgrp->plr && rdtgrp->plr->minor == minor) {
157 rdtgrp_match = rdtgrp;
158 break;
159 }
160 }
161 return rdtgrp_match;
162 }
163
164
165
166
167
168
169 struct pseudo_lock_pm_req {
170 struct list_head list;
171 struct dev_pm_qos_request req;
172 };
173
174 static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
175 {
176 struct pseudo_lock_pm_req *pm_req, *next;
177
178 list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
179 dev_pm_qos_remove_request(&pm_req->req);
180 list_del(&pm_req->list);
181 kfree(pm_req);
182 }
183 }
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200 static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
201 {
202 struct pseudo_lock_pm_req *pm_req;
203 int cpu;
204 int ret;
205
206 for_each_cpu(cpu, &plr->d->cpu_mask) {
207 pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
208 if (!pm_req) {
209 rdt_last_cmd_puts("Failure to allocate memory for PM QoS\n");
210 ret = -ENOMEM;
211 goto out_err;
212 }
213 ret = dev_pm_qos_add_request(get_cpu_device(cpu),
214 &pm_req->req,
215 DEV_PM_QOS_RESUME_LATENCY,
216 30);
217 if (ret < 0) {
218 rdt_last_cmd_printf("Failed to add latency req CPU%d\n",
219 cpu);
220 kfree(pm_req);
221 ret = -1;
222 goto out_err;
223 }
224 list_add(&pm_req->list, &plr->pm_reqs);
225 }
226
227 return 0;
228
229 out_err:
230 pseudo_lock_cstates_relax(plr);
231 return ret;
232 }
233
234
235
236
237
238
239
240
241
242
243 static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
244 {
245 plr->size = 0;
246 plr->line_size = 0;
247 kfree(plr->kmem);
248 plr->kmem = NULL;
249 plr->r = NULL;
250 if (plr->d)
251 plr->d->plr = NULL;
252 plr->d = NULL;
253 plr->cbm = 0;
254 plr->debugfs_dir = NULL;
255 }
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275 static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
276 {
277 struct cpu_cacheinfo *ci;
278 int ret;
279 int i;
280
281
282 plr->cpu = cpumask_first(&plr->d->cpu_mask);
283
284 if (!cpu_online(plr->cpu)) {
285 rdt_last_cmd_printf("CPU %u associated with cache not online\n",
286 plr->cpu);
287 ret = -ENODEV;
288 goto out_region;
289 }
290
291 ci = get_cpu_cacheinfo(plr->cpu);
292
293 plr->size = rdtgroup_cbm_to_size(plr->r, plr->d, plr->cbm);
294
295 for (i = 0; i < ci->num_leaves; i++) {
296 if (ci->info_list[i].level == plr->r->cache_level) {
297 plr->line_size = ci->info_list[i].coherency_line_size;
298 return 0;
299 }
300 }
301
302 ret = -1;
303 rdt_last_cmd_puts("Unable to determine cache line size\n");
304 out_region:
305 pseudo_lock_region_clear(plr);
306 return ret;
307 }
308
309
310
311
312
313
314
315
316
317
318
319
320 static int pseudo_lock_init(struct rdtgroup *rdtgrp)
321 {
322 struct pseudo_lock_region *plr;
323
324 plr = kzalloc(sizeof(*plr), GFP_KERNEL);
325 if (!plr)
326 return -ENOMEM;
327
328 init_waitqueue_head(&plr->lock_thread_wq);
329 INIT_LIST_HEAD(&plr->pm_reqs);
330 rdtgrp->plr = plr;
331 return 0;
332 }
333
334
335
336
337
338
339
340
341
342
343
344 static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
345 {
346 int ret;
347
348 ret = pseudo_lock_region_init(plr);
349 if (ret < 0)
350 return ret;
351
352
353
354
355
356 if (plr->size > KMALLOC_MAX_SIZE) {
357 rdt_last_cmd_puts("Requested region exceeds maximum size\n");
358 ret = -E2BIG;
359 goto out_region;
360 }
361
362 plr->kmem = kzalloc(plr->size, GFP_KERNEL);
363 if (!plr->kmem) {
364 rdt_last_cmd_puts("Unable to allocate memory\n");
365 ret = -ENOMEM;
366 goto out_region;
367 }
368
369 ret = 0;
370 goto out;
371 out_region:
372 pseudo_lock_region_clear(plr);
373 out:
374 return ret;
375 }
376
377
378
379
380
381
382
383
384
385
386
387 static void pseudo_lock_free(struct rdtgroup *rdtgrp)
388 {
389 pseudo_lock_region_clear(rdtgrp->plr);
390 kfree(rdtgrp->plr);
391 rdtgrp->plr = NULL;
392 }
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413 static int pseudo_lock_fn(void *_rdtgrp)
414 {
415 struct rdtgroup *rdtgrp = _rdtgrp;
416 struct pseudo_lock_region *plr = rdtgrp->plr;
417 u32 rmid_p, closid_p;
418 unsigned long i;
419 #ifdef CONFIG_KASAN
420
421
422
423
424
425
426
427
428 unsigned int line_size;
429 unsigned int size;
430 void *mem_r;
431 #else
432 register unsigned int line_size asm("esi");
433 register unsigned int size asm("edi");
434 register void *mem_r asm(_ASM_BX);
435 #endif
436
437
438
439
440
441
442
443
444
445 native_wbinvd();
446
447
448
449
450
451 local_irq_disable();
452
453
454
455
456
457
458
459
460
461
462 __wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
463 closid_p = this_cpu_read(pqr_state.cur_closid);
464 rmid_p = this_cpu_read(pqr_state.cur_rmid);
465 mem_r = plr->kmem;
466 size = plr->size;
467 line_size = plr->line_size;
468
469
470
471
472
473
474 __wrmsr(IA32_PQR_ASSOC, rmid_p, rdtgrp->closid);
475
476
477
478
479
480
481
482
483
484 for (i = 0; i < size; i += PAGE_SIZE) {
485
486
487
488
489 rmb();
490 asm volatile("mov (%0,%1,1), %%eax\n\t"
491 :
492 : "r" (mem_r), "r" (i)
493 : "%eax", "memory");
494 }
495 for (i = 0; i < size; i += line_size) {
496
497
498
499
500 rmb();
501 asm volatile("mov (%0,%1,1), %%eax\n\t"
502 :
503 : "r" (mem_r), "r" (i)
504 : "%eax", "memory");
505 }
506
507
508
509
510 __wrmsr(IA32_PQR_ASSOC, rmid_p, closid_p);
511
512
513 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
514 local_irq_enable();
515
516 plr->thread_done = 1;
517 wake_up_interruptible(&plr->lock_thread_wq);
518 return 0;
519 }
520
521
522
523
524
525
526
527
528 static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp)
529 {
530 return !list_empty(&rdtgrp->mon.crdtgrp_list);
531 }
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548 static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
549 {
550 int ret;
551
552 ret = rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
553 if (ret)
554 return ret;
555
556 ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
557 if (ret)
558 goto err_tasks;
559
560 ret = rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
561 if (ret)
562 goto err_cpus;
563
564 if (rdt_mon_capable) {
565 ret = rdtgroup_kn_mode_restrict(rdtgrp, "mon_groups");
566 if (ret)
567 goto err_cpus_list;
568 }
569
570 ret = 0;
571 goto out;
572
573 err_cpus_list:
574 rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
575 err_cpus:
576 rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
577 err_tasks:
578 rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
579 out:
580 return ret;
581 }
582
583
584
585
586
587
588
589
590
591
592
593
594
595 static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
596 {
597 int ret;
598
599 ret = rdtgroup_kn_mode_restore(rdtgrp, "tasks", 0777);
600 if (ret)
601 return ret;
602
603 ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0777);
604 if (ret)
605 goto err_tasks;
606
607 ret = rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0777);
608 if (ret)
609 goto err_cpus;
610
611 if (rdt_mon_capable) {
612 ret = rdtgroup_kn_mode_restore(rdtgrp, "mon_groups", 0777);
613 if (ret)
614 goto err_cpus_list;
615 }
616
617 ret = 0;
618 goto out;
619
620 err_cpus_list:
621 rdtgroup_kn_mode_restrict(rdtgrp, "cpus_list");
622 err_cpus:
623 rdtgroup_kn_mode_restrict(rdtgrp, "cpus");
624 err_tasks:
625 rdtgroup_kn_mode_restrict(rdtgrp, "tasks");
626 out:
627 return ret;
628 }
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648 int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
649 {
650 int ret;
651
652
653
654
655
656 if (rdtgrp == &rdtgroup_default) {
657 rdt_last_cmd_puts("Cannot pseudo-lock default group\n");
658 return -EINVAL;
659 }
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687 if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled ||
688 rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled) {
689 rdt_last_cmd_puts("CDP enabled\n");
690 return -EINVAL;
691 }
692
693
694
695
696
697 prefetch_disable_bits = get_prefetch_disable_bits();
698 if (prefetch_disable_bits == 0) {
699 rdt_last_cmd_puts("Pseudo-locking not supported\n");
700 return -EINVAL;
701 }
702
703 if (rdtgroup_monitor_in_progress(rdtgrp)) {
704 rdt_last_cmd_puts("Monitoring in progress\n");
705 return -EINVAL;
706 }
707
708 if (rdtgroup_tasks_assigned(rdtgrp)) {
709 rdt_last_cmd_puts("Tasks assigned to resource group\n");
710 return -EINVAL;
711 }
712
713 if (!cpumask_empty(&rdtgrp->cpu_mask)) {
714 rdt_last_cmd_puts("CPUs assigned to resource group\n");
715 return -EINVAL;
716 }
717
718 if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
719 rdt_last_cmd_puts("Unable to modify resctrl permissions\n");
720 return -EIO;
721 }
722
723 ret = pseudo_lock_init(rdtgrp);
724 if (ret) {
725 rdt_last_cmd_puts("Unable to init pseudo-lock region\n");
726 goto out_release;
727 }
728
729
730
731
732
733
734
735 free_rmid(rdtgrp->mon.rmid);
736
737 ret = 0;
738 goto out;
739
740 out_release:
741 rdtgroup_locksetup_user_restore(rdtgrp);
742 out:
743 return ret;
744 }
745
746
747
748
749
750
751
752
753
754
755 int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
756 {
757 int ret;
758
759 if (rdt_mon_capable) {
760 ret = alloc_rmid();
761 if (ret < 0) {
762 rdt_last_cmd_puts("Out of RMIDs\n");
763 return ret;
764 }
765 rdtgrp->mon.rmid = ret;
766 }
767
768 ret = rdtgroup_locksetup_user_restore(rdtgrp);
769 if (ret) {
770 free_rmid(rdtgrp->mon.rmid);
771 return ret;
772 }
773
774 pseudo_lock_free(rdtgrp);
775 return 0;
776 }
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793 bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm)
794 {
795 unsigned int cbm_len;
796 unsigned long cbm_b;
797
798 if (d->plr) {
799 cbm_len = d->plr->r->cache.cbm_len;
800 cbm_b = d->plr->cbm;
801 if (bitmap_intersects(&cbm, &cbm_b, cbm_len))
802 return true;
803 }
804 return false;
805 }
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820 bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
821 {
822 cpumask_var_t cpu_with_psl;
823 struct rdt_resource *r;
824 struct rdt_domain *d_i;
825 bool ret = false;
826
827 if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
828 return true;
829
830
831
832
833
834 for_each_alloc_enabled_rdt_resource(r) {
835 list_for_each_entry(d_i, &r->domains, list) {
836 if (d_i->plr)
837 cpumask_or(cpu_with_psl, cpu_with_psl,
838 &d_i->cpu_mask);
839 }
840 }
841
842
843
844
845
846 if (cpumask_intersects(&d->cpu_mask, cpu_with_psl))
847 ret = true;
848
849 free_cpumask_var(cpu_with_psl);
850 return ret;
851 }
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867 static int measure_cycles_lat_fn(void *_plr)
868 {
869 struct pseudo_lock_region *plr = _plr;
870 unsigned long i;
871 u64 start, end;
872 void *mem_r;
873
874 local_irq_disable();
875
876
877
878 wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
879 mem_r = READ_ONCE(plr->kmem);
880
881
882
883
884 start = rdtsc_ordered();
885 for (i = 0; i < plr->size; i += 32) {
886 start = rdtsc_ordered();
887 asm volatile("mov (%0,%1,1), %%eax\n\t"
888 :
889 : "r" (mem_r), "r" (i)
890 : "%eax", "memory");
891 end = rdtsc_ordered();
892 trace_pseudo_lock_mem_latency((u32)(end - start));
893 }
894 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
895 local_irq_enable();
896 plr->thread_done = 1;
897 wake_up_interruptible(&plr->lock_thread_wq);
898 return 0;
899 }
900
901
902
903
904
905
906
907
908
909
910 static struct perf_event_attr perf_miss_attr = {
911 .type = PERF_TYPE_RAW,
912 .size = sizeof(struct perf_event_attr),
913 .pinned = 1,
914 .disabled = 0,
915 .exclude_user = 1,
916 };
917
918 static struct perf_event_attr perf_hit_attr = {
919 .type = PERF_TYPE_RAW,
920 .size = sizeof(struct perf_event_attr),
921 .pinned = 1,
922 .disabled = 0,
923 .exclude_user = 1,
924 };
925
926 struct residency_counts {
927 u64 miss_before, hits_before;
928 u64 miss_after, hits_after;
929 };
930
931 static int measure_residency_fn(struct perf_event_attr *miss_attr,
932 struct perf_event_attr *hit_attr,
933 struct pseudo_lock_region *plr,
934 struct residency_counts *counts)
935 {
936 u64 hits_before = 0, hits_after = 0, miss_before = 0, miss_after = 0;
937 struct perf_event *miss_event, *hit_event;
938 int hit_pmcnum, miss_pmcnum;
939 unsigned int line_size;
940 unsigned int size;
941 unsigned long i;
942 void *mem_r;
943 u64 tmp;
944
945 miss_event = perf_event_create_kernel_counter(miss_attr, plr->cpu,
946 NULL, NULL, NULL);
947 if (IS_ERR(miss_event))
948 goto out;
949
950 hit_event = perf_event_create_kernel_counter(hit_attr, plr->cpu,
951 NULL, NULL, NULL);
952 if (IS_ERR(hit_event))
953 goto out_miss;
954
955 local_irq_disable();
956
957
958
959
960 if (perf_event_read_local(miss_event, &tmp, NULL, NULL)) {
961 local_irq_enable();
962 goto out_hit;
963 }
964 if (perf_event_read_local(hit_event, &tmp, NULL, NULL)) {
965 local_irq_enable();
966 goto out_hit;
967 }
968
969
970
971
972 wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
973
974
975
976
977
978
979 miss_pmcnum = x86_perf_rdpmc_index(miss_event);
980 hit_pmcnum = x86_perf_rdpmc_index(hit_event);
981 line_size = READ_ONCE(plr->line_size);
982 mem_r = READ_ONCE(plr->kmem);
983 size = READ_ONCE(plr->size);
984
985
986
987
988
989
990 rdpmcl(hit_pmcnum, hits_before);
991 rdpmcl(miss_pmcnum, miss_before);
992
993
994
995
996
997
998 rmb();
999 rdpmcl(hit_pmcnum, hits_before);
1000 rdpmcl(miss_pmcnum, miss_before);
1001
1002
1003
1004
1005 rmb();
1006 for (i = 0; i < size; i += line_size) {
1007
1008
1009
1010
1011 rmb();
1012 asm volatile("mov (%0,%1,1), %%eax\n\t"
1013 :
1014 : "r" (mem_r), "r" (i)
1015 : "%eax", "memory");
1016 }
1017
1018
1019
1020
1021 rmb();
1022 rdpmcl(hit_pmcnum, hits_after);
1023 rdpmcl(miss_pmcnum, miss_after);
1024
1025
1026
1027
1028 rmb();
1029
1030 wrmsr(MSR_MISC_FEATURE_CONTROL, 0x0, 0x0);
1031 local_irq_enable();
1032 out_hit:
1033 perf_event_release_kernel(hit_event);
1034 out_miss:
1035 perf_event_release_kernel(miss_event);
1036 out:
1037
1038
1039
1040 counts->miss_before = miss_before;
1041 counts->hits_before = hits_before;
1042 counts->miss_after = miss_after;
1043 counts->hits_after = hits_after;
1044 return 0;
1045 }
1046
1047 static int measure_l2_residency(void *_plr)
1048 {
1049 struct pseudo_lock_region *plr = _plr;
1050 struct residency_counts counts = {0};
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060 switch (boot_cpu_data.x86_model) {
1061 case INTEL_FAM6_ATOM_GOLDMONT:
1062 case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
1063 perf_miss_attr.config = X86_CONFIG(.event = 0xd1,
1064 .umask = 0x10);
1065 perf_hit_attr.config = X86_CONFIG(.event = 0xd1,
1066 .umask = 0x2);
1067 break;
1068 default:
1069 goto out;
1070 }
1071
1072 measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
1073
1074
1075
1076
1077 trace_pseudo_lock_l2(counts.hits_after - counts.hits_before,
1078 counts.miss_after - counts.miss_before);
1079 out:
1080 plr->thread_done = 1;
1081 wake_up_interruptible(&plr->lock_thread_wq);
1082 return 0;
1083 }
1084
1085 static int measure_l3_residency(void *_plr)
1086 {
1087 struct pseudo_lock_region *plr = _plr;
1088 struct residency_counts counts = {0};
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099 switch (boot_cpu_data.x86_model) {
1100 case INTEL_FAM6_BROADWELL_X:
1101
1102 perf_hit_attr.config = X86_CONFIG(.event = 0x2e,
1103 .umask = 0x4f);
1104 perf_miss_attr.config = X86_CONFIG(.event = 0x2e,
1105 .umask = 0x41);
1106 break;
1107 default:
1108 goto out;
1109 }
1110
1111 measure_residency_fn(&perf_miss_attr, &perf_hit_attr, plr, &counts);
1112
1113
1114
1115
1116
1117 counts.miss_after -= counts.miss_before;
1118 if (boot_cpu_data.x86_model == INTEL_FAM6_BROADWELL_X) {
1119
1120
1121
1122
1123
1124
1125
1126
1127 counts.hits_after -= counts.hits_before;
1128
1129 counts.hits_after -= min(counts.miss_after, counts.hits_after);
1130 } else {
1131 counts.hits_after -= counts.hits_before;
1132 }
1133
1134 trace_pseudo_lock_l3(counts.hits_after, counts.miss_after);
1135 out:
1136 plr->thread_done = 1;
1137 wake_up_interruptible(&plr->lock_thread_wq);
1138 return 0;
1139 }
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151 static int pseudo_lock_measure_cycles(struct rdtgroup *rdtgrp, int sel)
1152 {
1153 struct pseudo_lock_region *plr = rdtgrp->plr;
1154 struct task_struct *thread;
1155 unsigned int cpu;
1156 int ret = -1;
1157
1158 cpus_read_lock();
1159 mutex_lock(&rdtgroup_mutex);
1160
1161 if (rdtgrp->flags & RDT_DELETED) {
1162 ret = -ENODEV;
1163 goto out;
1164 }
1165
1166 if (!plr->d) {
1167 ret = -ENODEV;
1168 goto out;
1169 }
1170
1171 plr->thread_done = 0;
1172 cpu = cpumask_first(&plr->d->cpu_mask);
1173 if (!cpu_online(cpu)) {
1174 ret = -ENODEV;
1175 goto out;
1176 }
1177
1178 plr->cpu = cpu;
1179
1180 if (sel == 1)
1181 thread = kthread_create_on_node(measure_cycles_lat_fn, plr,
1182 cpu_to_node(cpu),
1183 "pseudo_lock_measure/%u",
1184 cpu);
1185 else if (sel == 2)
1186 thread = kthread_create_on_node(measure_l2_residency, plr,
1187 cpu_to_node(cpu),
1188 "pseudo_lock_measure/%u",
1189 cpu);
1190 else if (sel == 3)
1191 thread = kthread_create_on_node(measure_l3_residency, plr,
1192 cpu_to_node(cpu),
1193 "pseudo_lock_measure/%u",
1194 cpu);
1195 else
1196 goto out;
1197
1198 if (IS_ERR(thread)) {
1199 ret = PTR_ERR(thread);
1200 goto out;
1201 }
1202 kthread_bind(thread, cpu);
1203 wake_up_process(thread);
1204
1205 ret = wait_event_interruptible(plr->lock_thread_wq,
1206 plr->thread_done == 1);
1207 if (ret < 0)
1208 goto out;
1209
1210 ret = 0;
1211
1212 out:
1213 mutex_unlock(&rdtgroup_mutex);
1214 cpus_read_unlock();
1215 return ret;
1216 }
1217
1218 static ssize_t pseudo_lock_measure_trigger(struct file *file,
1219 const char __user *user_buf,
1220 size_t count, loff_t *ppos)
1221 {
1222 struct rdtgroup *rdtgrp = file->private_data;
1223 size_t buf_size;
1224 char buf[32];
1225 int ret;
1226 int sel;
1227
1228 buf_size = min(count, (sizeof(buf) - 1));
1229 if (copy_from_user(buf, user_buf, buf_size))
1230 return -EFAULT;
1231
1232 buf[buf_size] = '\0';
1233 ret = kstrtoint(buf, 10, &sel);
1234 if (ret == 0) {
1235 if (sel != 1 && sel != 2 && sel != 3)
1236 return -EINVAL;
1237 ret = debugfs_file_get(file->f_path.dentry);
1238 if (ret)
1239 return ret;
1240 ret = pseudo_lock_measure_cycles(rdtgrp, sel);
1241 if (ret == 0)
1242 ret = count;
1243 debugfs_file_put(file->f_path.dentry);
1244 }
1245
1246 return ret;
1247 }
1248
1249 static const struct file_operations pseudo_measure_fops = {
1250 .write = pseudo_lock_measure_trigger,
1251 .open = simple_open,
1252 .llseek = default_llseek,
1253 };
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271 int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
1272 {
1273 struct pseudo_lock_region *plr = rdtgrp->plr;
1274 struct task_struct *thread;
1275 unsigned int new_minor;
1276 struct device *dev;
1277 int ret;
1278
1279 ret = pseudo_lock_region_alloc(plr);
1280 if (ret < 0)
1281 return ret;
1282
1283 ret = pseudo_lock_cstates_constrain(plr);
1284 if (ret < 0) {
1285 ret = -EINVAL;
1286 goto out_region;
1287 }
1288
1289 plr->thread_done = 0;
1290
1291 thread = kthread_create_on_node(pseudo_lock_fn, rdtgrp,
1292 cpu_to_node(plr->cpu),
1293 "pseudo_lock/%u", plr->cpu);
1294 if (IS_ERR(thread)) {
1295 ret = PTR_ERR(thread);
1296 rdt_last_cmd_printf("Locking thread returned error %d\n", ret);
1297 goto out_cstates;
1298 }
1299
1300 kthread_bind(thread, plr->cpu);
1301 wake_up_process(thread);
1302
1303 ret = wait_event_interruptible(plr->lock_thread_wq,
1304 plr->thread_done == 1);
1305 if (ret < 0) {
1306
1307
1308
1309
1310
1311
1312
1313
1314 rdt_last_cmd_puts("Locking thread interrupted\n");
1315 goto out_cstates;
1316 }
1317
1318 ret = pseudo_lock_minor_get(&new_minor);
1319 if (ret < 0) {
1320 rdt_last_cmd_puts("Unable to obtain a new minor number\n");
1321 goto out_cstates;
1322 }
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333 mutex_unlock(&rdtgroup_mutex);
1334
1335 if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
1336 plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name,
1337 debugfs_resctrl);
1338 if (!IS_ERR_OR_NULL(plr->debugfs_dir))
1339 debugfs_create_file("pseudo_lock_measure", 0200,
1340 plr->debugfs_dir, rdtgrp,
1341 &pseudo_measure_fops);
1342 }
1343
1344 dev = device_create(pseudo_lock_class, NULL,
1345 MKDEV(pseudo_lock_major, new_minor),
1346 rdtgrp, "%s", rdtgrp->kn->name);
1347
1348 mutex_lock(&rdtgroup_mutex);
1349
1350 if (IS_ERR(dev)) {
1351 ret = PTR_ERR(dev);
1352 rdt_last_cmd_printf("Failed to create character device: %d\n",
1353 ret);
1354 goto out_debugfs;
1355 }
1356
1357
1358 if (rdtgrp->flags & RDT_DELETED) {
1359 ret = -ENODEV;
1360 goto out_device;
1361 }
1362
1363 plr->minor = new_minor;
1364
1365 rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
1366 closid_free(rdtgrp->closid);
1367 rdtgroup_kn_mode_restore(rdtgrp, "cpus", 0444);
1368 rdtgroup_kn_mode_restore(rdtgrp, "cpus_list", 0444);
1369
1370 ret = 0;
1371 goto out;
1372
1373 out_device:
1374 device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
1375 out_debugfs:
1376 debugfs_remove_recursive(plr->debugfs_dir);
1377 pseudo_lock_minor_release(new_minor);
1378 out_cstates:
1379 pseudo_lock_cstates_relax(plr);
1380 out_region:
1381 pseudo_lock_region_clear(plr);
1382 out:
1383 return ret;
1384 }
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400 void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
1401 {
1402 struct pseudo_lock_region *plr = rdtgrp->plr;
1403
1404 if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1405
1406
1407
1408
1409 closid_free(rdtgrp->closid);
1410 goto free;
1411 }
1412
1413 pseudo_lock_cstates_relax(plr);
1414 debugfs_remove_recursive(rdtgrp->plr->debugfs_dir);
1415 device_destroy(pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
1416 pseudo_lock_minor_release(plr->minor);
1417
1418 free:
1419 pseudo_lock_free(rdtgrp);
1420 }
1421
1422 static int pseudo_lock_dev_open(struct inode *inode, struct file *filp)
1423 {
1424 struct rdtgroup *rdtgrp;
1425
1426 mutex_lock(&rdtgroup_mutex);
1427
1428 rdtgrp = region_find_by_minor(iminor(inode));
1429 if (!rdtgrp) {
1430 mutex_unlock(&rdtgroup_mutex);
1431 return -ENODEV;
1432 }
1433
1434 filp->private_data = rdtgrp;
1435 atomic_inc(&rdtgrp->waitcount);
1436
1437 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1438
1439 mutex_unlock(&rdtgroup_mutex);
1440
1441 return 0;
1442 }
1443
1444 static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
1445 {
1446 struct rdtgroup *rdtgrp;
1447
1448 mutex_lock(&rdtgroup_mutex);
1449 rdtgrp = filp->private_data;
1450 WARN_ON(!rdtgrp);
1451 if (!rdtgrp) {
1452 mutex_unlock(&rdtgroup_mutex);
1453 return -ENODEV;
1454 }
1455 filp->private_data = NULL;
1456 atomic_dec(&rdtgrp->waitcount);
1457 mutex_unlock(&rdtgroup_mutex);
1458 return 0;
1459 }
1460
1461 static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
1462 {
1463
1464 return -EINVAL;
1465 }
1466
1467 static const struct vm_operations_struct pseudo_mmap_ops = {
1468 .mremap = pseudo_lock_dev_mremap,
1469 };
1470
1471 static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma)
1472 {
1473 unsigned long vsize = vma->vm_end - vma->vm_start;
1474 unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
1475 struct pseudo_lock_region *plr;
1476 struct rdtgroup *rdtgrp;
1477 unsigned long physical;
1478 unsigned long psize;
1479
1480 mutex_lock(&rdtgroup_mutex);
1481
1482 rdtgrp = filp->private_data;
1483 WARN_ON(!rdtgrp);
1484 if (!rdtgrp) {
1485 mutex_unlock(&rdtgroup_mutex);
1486 return -ENODEV;
1487 }
1488
1489 plr = rdtgrp->plr;
1490
1491 if (!plr->d) {
1492 mutex_unlock(&rdtgroup_mutex);
1493 return -ENODEV;
1494 }
1495
1496
1497
1498
1499
1500
1501
1502 if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) {
1503 mutex_unlock(&rdtgroup_mutex);
1504 return -EINVAL;
1505 }
1506
1507 physical = __pa(plr->kmem) >> PAGE_SHIFT;
1508 psize = plr->size - off;
1509
1510 if (off > plr->size) {
1511 mutex_unlock(&rdtgroup_mutex);
1512 return -ENOSPC;
1513 }
1514
1515
1516
1517
1518
1519 if (!(vma->vm_flags & VM_SHARED)) {
1520 mutex_unlock(&rdtgroup_mutex);
1521 return -EINVAL;
1522 }
1523
1524 if (vsize > psize) {
1525 mutex_unlock(&rdtgroup_mutex);
1526 return -ENOSPC;
1527 }
1528
1529 memset(plr->kmem + off, 0, vsize);
1530
1531 if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
1532 vsize, vma->vm_page_prot)) {
1533 mutex_unlock(&rdtgroup_mutex);
1534 return -EAGAIN;
1535 }
1536 vma->vm_ops = &pseudo_mmap_ops;
1537 mutex_unlock(&rdtgroup_mutex);
1538 return 0;
1539 }
1540
1541 static const struct file_operations pseudo_lock_dev_fops = {
1542 .owner = THIS_MODULE,
1543 .llseek = no_llseek,
1544 .read = NULL,
1545 .write = NULL,
1546 .open = pseudo_lock_dev_open,
1547 .release = pseudo_lock_dev_release,
1548 .mmap = pseudo_lock_dev_mmap,
1549 };
1550
1551 static char *pseudo_lock_devnode(struct device *dev, umode_t *mode)
1552 {
1553 struct rdtgroup *rdtgrp;
1554
1555 rdtgrp = dev_get_drvdata(dev);
1556 if (mode)
1557 *mode = 0600;
1558 return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name);
1559 }
1560
1561 int rdt_pseudo_lock_init(void)
1562 {
1563 int ret;
1564
1565 ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops);
1566 if (ret < 0)
1567 return ret;
1568
1569 pseudo_lock_major = ret;
1570
1571 pseudo_lock_class = class_create(THIS_MODULE, "pseudo_lock");
1572 if (IS_ERR(pseudo_lock_class)) {
1573 ret = PTR_ERR(pseudo_lock_class);
1574 unregister_chrdev(pseudo_lock_major, "pseudo_lock");
1575 return ret;
1576 }
1577
1578 pseudo_lock_class->devnode = pseudo_lock_devnode;
1579 return 0;
1580 }
1581
1582 void rdt_pseudo_lock_release(void)
1583 {
1584 class_destroy(pseudo_lock_class);
1585 pseudo_lock_class = NULL;
1586 unregister_chrdev(pseudo_lock_major, "pseudo_lock");
1587 pseudo_lock_major = 0;
1588 }