This source file includes following definitions.
- kvm_pmi_trigger_fn
- kvm_perf_overflow
- kvm_perf_overflow_intr
- pmc_reprogram_counter
- reprogram_gp_counter
- reprogram_fixed_counter
- reprogram_counter
- kvm_pmu_handle_event
- kvm_pmu_is_valid_msr_idx
- is_vmware_backdoor_pmc
- kvm_pmu_rdpmc_vmware
- kvm_pmu_rdpmc
- kvm_pmu_deliver_pmi
- kvm_pmu_is_valid_msr
- kvm_pmu_get_msr
- kvm_pmu_set_msr
- kvm_pmu_refresh
- kvm_pmu_reset
- kvm_pmu_init
- kvm_pmu_destroy
- kvm_vm_ioctl_set_pmu_event_filter
1
2
3
4
5
6
7
8
9
10
11
12
13 #include <linux/types.h>
14 #include <linux/kvm_host.h>
15 #include <linux/perf_event.h>
16 #include <asm/perf_event.h>
17 #include "x86.h"
18 #include "cpuid.h"
19 #include "lapic.h"
20 #include "pmu.h"
21
22
23 #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50 static void kvm_pmi_trigger_fn(struct irq_work *irq_work)
51 {
52 struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, irq_work);
53 struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
54
55 kvm_pmu_deliver_pmi(vcpu);
56 }
57
58 static void kvm_perf_overflow(struct perf_event *perf_event,
59 struct perf_sample_data *data,
60 struct pt_regs *regs)
61 {
62 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
63 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
64
65 if (!test_and_set_bit(pmc->idx,
66 (unsigned long *)&pmu->reprogram_pmi)) {
67 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
68 kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
69 }
70 }
71
72 static void kvm_perf_overflow_intr(struct perf_event *perf_event,
73 struct perf_sample_data *data,
74 struct pt_regs *regs)
75 {
76 struct kvm_pmc *pmc = perf_event->overflow_handler_context;
77 struct kvm_pmu *pmu = pmc_to_pmu(pmc);
78
79 if (!test_and_set_bit(pmc->idx,
80 (unsigned long *)&pmu->reprogram_pmi)) {
81 __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
82 kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
83
84
85
86
87
88
89
90
91
92 if (!kvm_is_in_guest())
93 irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
94 else
95 kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
96 }
97 }
98
99 static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
100 unsigned config, bool exclude_user,
101 bool exclude_kernel, bool intr,
102 bool in_tx, bool in_tx_cp)
103 {
104 struct perf_event *event;
105 struct perf_event_attr attr = {
106 .type = type,
107 .size = sizeof(attr),
108 .pinned = true,
109 .exclude_idle = true,
110 .exclude_host = 1,
111 .exclude_user = exclude_user,
112 .exclude_kernel = exclude_kernel,
113 .config = config,
114 };
115
116 attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
117
118 if (in_tx)
119 attr.config |= HSW_IN_TX;
120 if (in_tx_cp) {
121
122
123
124
125
126 attr.sample_period = 0;
127 attr.config |= HSW_IN_TX_CHECKPOINTED;
128 }
129
130 event = perf_event_create_kernel_counter(&attr, -1, current,
131 intr ? kvm_perf_overflow_intr :
132 kvm_perf_overflow, pmc);
133 if (IS_ERR(event)) {
134 pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
135 PTR_ERR(event), pmc->idx);
136 return;
137 }
138
139 pmc->perf_event = event;
140 clear_bit(pmc->idx, (unsigned long*)&pmc_to_pmu(pmc)->reprogram_pmi);
141 }
142
143 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
144 {
145 unsigned config, type = PERF_TYPE_RAW;
146 u8 event_select, unit_mask;
147 struct kvm *kvm = pmc->vcpu->kvm;
148 struct kvm_pmu_event_filter *filter;
149 int i;
150 bool allow_event = true;
151
152 if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
153 printk_once("kvm pmu: pin control bit is ignored\n");
154
155 pmc->eventsel = eventsel;
156
157 pmc_stop_counter(pmc);
158
159 if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
160 return;
161
162 filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
163 if (filter) {
164 for (i = 0; i < filter->nevents; i++)
165 if (filter->events[i] ==
166 (eventsel & AMD64_RAW_EVENT_MASK_NB))
167 break;
168 if (filter->action == KVM_PMU_EVENT_ALLOW &&
169 i == filter->nevents)
170 allow_event = false;
171 if (filter->action == KVM_PMU_EVENT_DENY &&
172 i < filter->nevents)
173 allow_event = false;
174 }
175 if (!allow_event)
176 return;
177
178 event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
179 unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
180
181 if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
182 ARCH_PERFMON_EVENTSEL_INV |
183 ARCH_PERFMON_EVENTSEL_CMASK |
184 HSW_IN_TX |
185 HSW_IN_TX_CHECKPOINTED))) {
186 config = kvm_x86_ops->pmu_ops->find_arch_event(pmc_to_pmu(pmc),
187 event_select,
188 unit_mask);
189 if (config != PERF_COUNT_HW_MAX)
190 type = PERF_TYPE_HARDWARE;
191 }
192
193 if (type == PERF_TYPE_RAW)
194 config = eventsel & X86_RAW_EVENT_MASK;
195
196 pmc_reprogram_counter(pmc, type, config,
197 !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
198 !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
199 eventsel & ARCH_PERFMON_EVENTSEL_INT,
200 (eventsel & HSW_IN_TX),
201 (eventsel & HSW_IN_TX_CHECKPOINTED));
202 }
203 EXPORT_SYMBOL_GPL(reprogram_gp_counter);
204
205 void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
206 {
207 unsigned en_field = ctrl & 0x3;
208 bool pmi = ctrl & 0x8;
209 struct kvm_pmu_event_filter *filter;
210 struct kvm *kvm = pmc->vcpu->kvm;
211
212 pmc_stop_counter(pmc);
213
214 if (!en_field || !pmc_is_enabled(pmc))
215 return;
216
217 filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
218 if (filter) {
219 if (filter->action == KVM_PMU_EVENT_DENY &&
220 test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
221 return;
222 if (filter->action == KVM_PMU_EVENT_ALLOW &&
223 !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
224 return;
225 }
226
227 pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
228 kvm_x86_ops->pmu_ops->find_fixed_event(idx),
229 !(en_field & 0x2),
230 !(en_field & 0x1),
231 pmi, false, false);
232 }
233 EXPORT_SYMBOL_GPL(reprogram_fixed_counter);
234
235 void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx)
236 {
237 struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, pmc_idx);
238
239 if (!pmc)
240 return;
241
242 if (pmc_is_gp(pmc))
243 reprogram_gp_counter(pmc, pmc->eventsel);
244 else {
245 int idx = pmc_idx - INTEL_PMC_IDX_FIXED;
246 u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl, idx);
247
248 reprogram_fixed_counter(pmc, ctrl, idx);
249 }
250 }
251 EXPORT_SYMBOL_GPL(reprogram_counter);
252
253 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
254 {
255 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
256 u64 bitmask;
257 int bit;
258
259 bitmask = pmu->reprogram_pmi;
260
261 for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
262 struct kvm_pmc *pmc = kvm_x86_ops->pmu_ops->pmc_idx_to_pmc(pmu, bit);
263
264 if (unlikely(!pmc || !pmc->perf_event)) {
265 clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
266 continue;
267 }
268
269 reprogram_counter(pmu, bit);
270 }
271 }
272
273
274 int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
275 {
276 return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx);
277 }
278
279 bool is_vmware_backdoor_pmc(u32 pmc_idx)
280 {
281 switch (pmc_idx) {
282 case VMWARE_BACKDOOR_PMC_HOST_TSC:
283 case VMWARE_BACKDOOR_PMC_REAL_TIME:
284 case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
285 return true;
286 }
287 return false;
288 }
289
290 static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
291 {
292 u64 ctr_val;
293
294 switch (idx) {
295 case VMWARE_BACKDOOR_PMC_HOST_TSC:
296 ctr_val = rdtsc();
297 break;
298 case VMWARE_BACKDOOR_PMC_REAL_TIME:
299 ctr_val = ktime_get_boottime_ns();
300 break;
301 case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
302 ctr_val = ktime_get_boottime_ns() +
303 vcpu->kvm->arch.kvmclock_offset;
304 break;
305 default:
306 return 1;
307 }
308
309 *data = ctr_val;
310 return 0;
311 }
312
313 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
314 {
315 bool fast_mode = idx & (1u << 31);
316 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
317 struct kvm_pmc *pmc;
318 u64 mask = fast_mode ? ~0u : ~0ull;
319
320 if (!pmu->version)
321 return 1;
322
323 if (is_vmware_backdoor_pmc(idx))
324 return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
325
326 pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx, &mask);
327 if (!pmc)
328 return 1;
329
330 *data = pmc_read_counter(pmc) & mask;
331 return 0;
332 }
333
334 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
335 {
336 if (lapic_in_kernel(vcpu))
337 kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
338 }
339
340 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
341 {
342 return kvm_x86_ops->pmu_ops->is_valid_msr(vcpu, msr);
343 }
344
345 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
346 {
347 return kvm_x86_ops->pmu_ops->get_msr(vcpu, msr, data);
348 }
349
350 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
351 {
352 return kvm_x86_ops->pmu_ops->set_msr(vcpu, msr_info);
353 }
354
355
356
357
358
359 void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
360 {
361 kvm_x86_ops->pmu_ops->refresh(vcpu);
362 }
363
364 void kvm_pmu_reset(struct kvm_vcpu *vcpu)
365 {
366 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
367
368 irq_work_sync(&pmu->irq_work);
369 kvm_x86_ops->pmu_ops->reset(vcpu);
370 }
371
372 void kvm_pmu_init(struct kvm_vcpu *vcpu)
373 {
374 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
375
376 memset(pmu, 0, sizeof(*pmu));
377 kvm_x86_ops->pmu_ops->init(vcpu);
378 init_irq_work(&pmu->irq_work, kvm_pmi_trigger_fn);
379 kvm_pmu_refresh(vcpu);
380 }
381
382 void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
383 {
384 kvm_pmu_reset(vcpu);
385 }
386
387 int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
388 {
389 struct kvm_pmu_event_filter tmp, *filter;
390 size_t size;
391 int r;
392
393 if (copy_from_user(&tmp, argp, sizeof(tmp)))
394 return -EFAULT;
395
396 if (tmp.action != KVM_PMU_EVENT_ALLOW &&
397 tmp.action != KVM_PMU_EVENT_DENY)
398 return -EINVAL;
399
400 if (tmp.flags != 0)
401 return -EINVAL;
402
403 if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
404 return -E2BIG;
405
406 size = struct_size(filter, events, tmp.nevents);
407 filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
408 if (!filter)
409 return -ENOMEM;
410
411 r = -EFAULT;
412 if (copy_from_user(filter, argp, size))
413 goto cleanup;
414
415
416 *filter = tmp;
417
418 mutex_lock(&kvm->lock);
419 rcu_swap_protected(kvm->arch.pmu_event_filter, filter,
420 mutex_is_locked(&kvm->lock));
421 mutex_unlock(&kvm->lock);
422
423 synchronize_srcu_expedited(&kvm->srcu);
424 r = 0;
425 cleanup:
426 kfree(filter);
427 return r;
428 }