This source file includes following definitions.
- kvm_no_compat_ioctl
- kvm_no_compat_open
- kvm_arch_mmu_notifier_invalidate_range
- kvm_is_zone_device_pfn
- kvm_is_reserved_pfn
- vcpu_load
- vcpu_put
- kvm_request_needs_ipi
- ack_flush
- kvm_kick_many_cpus
- kvm_make_vcpus_request_mask
- kvm_make_all_cpus_request
- kvm_flush_remote_tlbs
- kvm_reload_remote_mmus
- kvm_vcpu_init
- kvm_vcpu_uninit
- mmu_notifier_to_kvm
- kvm_mmu_notifier_invalidate_range
- kvm_mmu_notifier_change_pte
- kvm_mmu_notifier_invalidate_range_start
- kvm_mmu_notifier_invalidate_range_end
- kvm_mmu_notifier_clear_flush_young
- kvm_mmu_notifier_clear_young
- kvm_mmu_notifier_test_young
- kvm_mmu_notifier_release
- kvm_init_mmu_notifier
- kvm_init_mmu_notifier
- kvm_alloc_memslots
- kvm_destroy_dirty_bitmap
- kvm_free_memslot
- kvm_free_memslots
- kvm_destroy_vm_debugfs
- kvm_create_vm_debugfs
- kvm_arch_post_init_vm
- kvm_arch_pre_destroy_vm
- kvm_create_vm
- kvm_destroy_devices
- kvm_destroy_vm
- kvm_get_kvm
- kvm_put_kvm
- kvm_vm_release
- kvm_create_dirty_bitmap
- update_memslots
- check_memory_region_flags
- install_new_memslots
- __kvm_set_memory_region
- kvm_set_memory_region
- kvm_vm_ioctl_set_memory_region
- kvm_get_dirty_log
- kvm_get_dirty_log_protect
- kvm_clear_dirty_log_protect
- kvm_largepages_enabled
- kvm_disable_largepages
- gfn_to_memslot
- kvm_vcpu_gfn_to_memslot
- kvm_is_visible_gfn
- kvm_host_page_size
- memslot_is_readonly
- __gfn_to_hva_many
- gfn_to_hva_many
- gfn_to_hva_memslot
- gfn_to_hva
- kvm_vcpu_gfn_to_hva
- gfn_to_hva_memslot_prot
- gfn_to_hva_prot
- kvm_vcpu_gfn_to_hva_prot
- check_user_page_hwpoison
- hva_to_pfn_fast
- hva_to_pfn_slow
- vma_is_valid
- hva_to_pfn_remapped
- hva_to_pfn
- __gfn_to_pfn_memslot
- gfn_to_pfn_prot
- gfn_to_pfn_memslot
- gfn_to_pfn_memslot_atomic
- gfn_to_pfn_atomic
- kvm_vcpu_gfn_to_pfn_atomic
- gfn_to_pfn
- kvm_vcpu_gfn_to_pfn
- gfn_to_page_many_atomic
- kvm_pfn_to_page
- gfn_to_page
- kvm_release_pfn
- kvm_cache_gfn_to_pfn
- __kvm_map_gfn
- kvm_map_gfn
- kvm_vcpu_map
- __kvm_unmap_gfn
- kvm_unmap_gfn
- kvm_vcpu_unmap
- kvm_vcpu_gfn_to_page
- kvm_release_page_clean
- kvm_release_pfn_clean
- kvm_release_page_dirty
- kvm_release_pfn_dirty
- kvm_set_pfn_dirty
- kvm_set_pfn_accessed
- kvm_get_pfn
- next_segment
- __kvm_read_guest_page
- kvm_read_guest_page
- kvm_vcpu_read_guest_page
- kvm_read_guest
- kvm_vcpu_read_guest
- __kvm_read_guest_atomic
- kvm_read_guest_atomic
- kvm_vcpu_read_guest_atomic
- __kvm_write_guest_page
- kvm_write_guest_page
- kvm_vcpu_write_guest_page
- kvm_write_guest
- kvm_vcpu_write_guest
- __kvm_gfn_to_hva_cache_init
- kvm_gfn_to_hva_cache_init
- kvm_write_guest_offset_cached
- kvm_write_guest_cached
- kvm_read_guest_cached
- kvm_clear_guest_page
- kvm_clear_guest
- mark_page_dirty_in_slot
- mark_page_dirty
- kvm_vcpu_mark_page_dirty
- kvm_sigset_activate
- kvm_sigset_deactivate
- grow_halt_poll_ns
- shrink_halt_poll_ns
- kvm_vcpu_check_block
- kvm_vcpu_block
- kvm_vcpu_wake_up
- kvm_vcpu_kick
- kvm_vcpu_yield_to
- kvm_vcpu_eligible_for_directed_yield
- kvm_arch_dy_runnable
- vcpu_dy_runnable
- kvm_vcpu_on_spin
- kvm_vcpu_fault
- kvm_vcpu_mmap
- kvm_vcpu_release
- create_vcpu_fd
- kvm_create_vcpu_debugfs
- kvm_vm_ioctl_create_vcpu
- kvm_vcpu_ioctl_set_sigmask
- kvm_vcpu_ioctl
- kvm_vcpu_compat_ioctl
- kvm_device_mmap
- kvm_device_ioctl_attr
- kvm_device_ioctl
- kvm_device_release
- kvm_device_from_filp
- kvm_register_device_ops
- kvm_unregister_device_ops
- kvm_ioctl_create_device
- kvm_vm_ioctl_check_extension_generic
- kvm_vm_ioctl_enable_cap
- kvm_vm_ioctl_enable_cap_generic
- kvm_vm_ioctl
- kvm_vm_compat_ioctl
- kvm_dev_ioctl_create_vm
- kvm_dev_ioctl
- hardware_enable_nolock
- kvm_starting_cpu
- hardware_disable_nolock
- kvm_dying_cpu
- hardware_disable_all_nolock
- hardware_disable_all
- hardware_enable_all
- kvm_reboot
- kvm_io_bus_destroy
- kvm_io_bus_cmp
- kvm_io_bus_sort_cmp
- kvm_io_bus_get_first_dev
- __kvm_io_bus_write
- kvm_io_bus_write
- kvm_io_bus_write_cookie
- __kvm_io_bus_read
- kvm_io_bus_read
- kvm_io_bus_register_dev
- kvm_io_bus_unregister_dev
- kvm_io_bus_get_dev
- kvm_debugfs_open
- kvm_debugfs_release
- vm_stat_get_per_vm
- vm_stat_clear_per_vm
- vm_stat_get_per_vm_open
- vcpu_stat_get_per_vm
- vcpu_stat_clear_per_vm
- vcpu_stat_get_per_vm_open
- vm_stat_get
- vm_stat_clear
- vcpu_stat_get
- vcpu_stat_clear
- kvm_uevent_notify_change
- kvm_init_debug
- kvm_suspend
- kvm_resume
- preempt_notifier_to_vcpu
- kvm_sched_in
- kvm_sched_out
- check_processor_compat
- kvm_init
- kvm_exit
- kvm_vm_worker_thread
- kvm_vm_create_worker_thread
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 #include <kvm/iodev.h>
17
18 #include <linux/kvm_host.h>
19 #include <linux/kvm.h>
20 #include <linux/module.h>
21 #include <linux/errno.h>
22 #include <linux/percpu.h>
23 #include <linux/mm.h>
24 #include <linux/miscdevice.h>
25 #include <linux/vmalloc.h>
26 #include <linux/reboot.h>
27 #include <linux/debugfs.h>
28 #include <linux/highmem.h>
29 #include <linux/file.h>
30 #include <linux/syscore_ops.h>
31 #include <linux/cpu.h>
32 #include <linux/sched/signal.h>
33 #include <linux/sched/mm.h>
34 #include <linux/sched/stat.h>
35 #include <linux/cpumask.h>
36 #include <linux/smp.h>
37 #include <linux/anon_inodes.h>
38 #include <linux/profile.h>
39 #include <linux/kvm_para.h>
40 #include <linux/pagemap.h>
41 #include <linux/mman.h>
42 #include <linux/swap.h>
43 #include <linux/bitops.h>
44 #include <linux/spinlock.h>
45 #include <linux/compat.h>
46 #include <linux/srcu.h>
47 #include <linux/hugetlb.h>
48 #include <linux/slab.h>
49 #include <linux/sort.h>
50 #include <linux/bsearch.h>
51 #include <linux/io.h>
52 #include <linux/lockdep.h>
53 #include <linux/kthread.h>
54
55 #include <asm/processor.h>
56 #include <asm/ioctl.h>
57 #include <linux/uaccess.h>
58 #include <asm/pgtable.h>
59
60 #include "coalesced_mmio.h"
61 #include "async_pf.h"
62 #include "vfio.h"
63
64 #define CREATE_TRACE_POINTS
65 #include <trace/events/kvm.h>
66
67
68 #define ITOA_MAX_LEN 12
69
70 MODULE_AUTHOR("Qumranet");
71 MODULE_LICENSE("GPL");
72
73
74 unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT;
75 module_param(halt_poll_ns, uint, 0644);
76 EXPORT_SYMBOL_GPL(halt_poll_ns);
77
78
79 unsigned int halt_poll_ns_grow = 2;
80 module_param(halt_poll_ns_grow, uint, 0644);
81 EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
82
83
84 unsigned int halt_poll_ns_grow_start = 10000;
85 module_param(halt_poll_ns_grow_start, uint, 0644);
86 EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start);
87
88
89 unsigned int halt_poll_ns_shrink;
90 module_param(halt_poll_ns_shrink, uint, 0644);
91 EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
92
93
94
95
96
97
98
99 DEFINE_MUTEX(kvm_lock);
100 static DEFINE_RAW_SPINLOCK(kvm_count_lock);
101 LIST_HEAD(vm_list);
102
103 static cpumask_var_t cpus_hardware_enabled;
104 static int kvm_usage_count;
105 static atomic_t hardware_enable_failed;
106
107 struct kmem_cache *kvm_vcpu_cache;
108 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
109
110 static __read_mostly struct preempt_ops kvm_preempt_ops;
111
112 struct dentry *kvm_debugfs_dir;
113 EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
114
115 static int kvm_debugfs_num_entries;
116 static const struct file_operations *stat_fops_per_vm[];
117
118 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
119 unsigned long arg);
120 #ifdef CONFIG_KVM_COMPAT
121 static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
122 unsigned long arg);
123 #define KVM_COMPAT(c) .compat_ioctl = (c)
124 #else
125
126
127
128
129
130
131
132 static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl,
133 unsigned long arg) { return -EINVAL; }
134
135 static int kvm_no_compat_open(struct inode *inode, struct file *file)
136 {
137 return is_compat_task() ? -ENODEV : 0;
138 }
139 #define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \
140 .open = kvm_no_compat_open
141 #endif
142 static int hardware_enable_all(void);
143 static void hardware_disable_all(void);
144
145 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
146
147 static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn);
148
149 __visible bool kvm_rebooting;
150 EXPORT_SYMBOL_GPL(kvm_rebooting);
151
152 static bool largepages_enabled = true;
153
154 #define KVM_EVENT_CREATE_VM 0
155 #define KVM_EVENT_DESTROY_VM 1
156 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
157 static unsigned long long kvm_createvm_count;
158 static unsigned long long kvm_active_vms;
159
160 __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
161 unsigned long start, unsigned long end)
162 {
163 }
164
165 bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
166 {
167
168
169
170
171
172
173 if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
174 return false;
175
176 return is_zone_device_page(pfn_to_page(pfn));
177 }
178
179 bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
180 {
181
182
183
184
185
186 if (pfn_valid(pfn))
187 return PageReserved(pfn_to_page(pfn)) &&
188 !kvm_is_zone_device_pfn(pfn);
189
190 return true;
191 }
192
193
194
195
196 void vcpu_load(struct kvm_vcpu *vcpu)
197 {
198 int cpu = get_cpu();
199 preempt_notifier_register(&vcpu->preempt_notifier);
200 kvm_arch_vcpu_load(vcpu, cpu);
201 put_cpu();
202 }
203 EXPORT_SYMBOL_GPL(vcpu_load);
204
205 void vcpu_put(struct kvm_vcpu *vcpu)
206 {
207 preempt_disable();
208 kvm_arch_vcpu_put(vcpu);
209 preempt_notifier_unregister(&vcpu->preempt_notifier);
210 preempt_enable();
211 }
212 EXPORT_SYMBOL_GPL(vcpu_put);
213
214
215 static bool kvm_request_needs_ipi(struct kvm_vcpu *vcpu, unsigned req)
216 {
217 int mode = kvm_vcpu_exiting_guest_mode(vcpu);
218
219
220
221
222
223 if (req & KVM_REQUEST_WAIT)
224 return mode != OUTSIDE_GUEST_MODE;
225
226
227
228
229 return mode == IN_GUEST_MODE;
230 }
231
232 static void ack_flush(void *_completed)
233 {
234 }
235
236 static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
237 {
238 if (unlikely(!cpus))
239 cpus = cpu_online_mask;
240
241 if (cpumask_empty(cpus))
242 return false;
243
244 smp_call_function_many(cpus, ack_flush, NULL, wait);
245 return true;
246 }
247
248 bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
249 unsigned long *vcpu_bitmap, cpumask_var_t tmp)
250 {
251 int i, cpu, me;
252 struct kvm_vcpu *vcpu;
253 bool called;
254
255 me = get_cpu();
256
257 kvm_for_each_vcpu(i, vcpu, kvm) {
258 if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
259 continue;
260
261 kvm_make_request(req, vcpu);
262 cpu = vcpu->cpu;
263
264 if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
265 continue;
266
267 if (tmp != NULL && cpu != -1 && cpu != me &&
268 kvm_request_needs_ipi(vcpu, req))
269 __cpumask_set_cpu(cpu, tmp);
270 }
271
272 called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT));
273 put_cpu();
274
275 return called;
276 }
277
278 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
279 {
280 cpumask_var_t cpus;
281 bool called;
282
283 zalloc_cpumask_var(&cpus, GFP_ATOMIC);
284
285 called = kvm_make_vcpus_request_mask(kvm, req, NULL, cpus);
286
287 free_cpumask_var(cpus);
288 return called;
289 }
290
291 #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
292 void kvm_flush_remote_tlbs(struct kvm *kvm)
293 {
294
295
296
297
298 long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
299
300
301
302
303
304
305
306
307
308
309
310
311 if (!kvm_arch_flush_remote_tlb(kvm)
312 || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
313 ++kvm->stat.remote_tlb_flush;
314 cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
315 }
316 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
317 #endif
318
319 void kvm_reload_remote_mmus(struct kvm *kvm)
320 {
321 kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
322 }
323
324 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
325 {
326 struct page *page;
327 int r;
328
329 mutex_init(&vcpu->mutex);
330 vcpu->cpu = -1;
331 vcpu->kvm = kvm;
332 vcpu->vcpu_id = id;
333 vcpu->pid = NULL;
334 init_swait_queue_head(&vcpu->wq);
335 kvm_async_pf_vcpu_init(vcpu);
336
337 vcpu->pre_pcpu = -1;
338 INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
339
340 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
341 if (!page) {
342 r = -ENOMEM;
343 goto fail;
344 }
345 vcpu->run = page_address(page);
346
347 kvm_vcpu_set_in_spin_loop(vcpu, false);
348 kvm_vcpu_set_dy_eligible(vcpu, false);
349 vcpu->preempted = false;
350 vcpu->ready = false;
351
352 r = kvm_arch_vcpu_init(vcpu);
353 if (r < 0)
354 goto fail_free_run;
355 return 0;
356
357 fail_free_run:
358 free_page((unsigned long)vcpu->run);
359 fail:
360 return r;
361 }
362 EXPORT_SYMBOL_GPL(kvm_vcpu_init);
363
364 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
365 {
366
367
368
369
370
371 put_pid(rcu_dereference_protected(vcpu->pid, 1));
372 kvm_arch_vcpu_uninit(vcpu);
373 free_page((unsigned long)vcpu->run);
374 }
375 EXPORT_SYMBOL_GPL(kvm_vcpu_uninit);
376
377 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
378 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
379 {
380 return container_of(mn, struct kvm, mmu_notifier);
381 }
382
383 static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
384 struct mm_struct *mm,
385 unsigned long start, unsigned long end)
386 {
387 struct kvm *kvm = mmu_notifier_to_kvm(mn);
388 int idx;
389
390 idx = srcu_read_lock(&kvm->srcu);
391 kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
392 srcu_read_unlock(&kvm->srcu, idx);
393 }
394
395 static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
396 struct mm_struct *mm,
397 unsigned long address,
398 pte_t pte)
399 {
400 struct kvm *kvm = mmu_notifier_to_kvm(mn);
401 int idx;
402
403 idx = srcu_read_lock(&kvm->srcu);
404 spin_lock(&kvm->mmu_lock);
405 kvm->mmu_notifier_seq++;
406
407 if (kvm_set_spte_hva(kvm, address, pte))
408 kvm_flush_remote_tlbs(kvm);
409
410 spin_unlock(&kvm->mmu_lock);
411 srcu_read_unlock(&kvm->srcu, idx);
412 }
413
414 static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
415 const struct mmu_notifier_range *range)
416 {
417 struct kvm *kvm = mmu_notifier_to_kvm(mn);
418 int need_tlb_flush = 0, idx;
419
420 idx = srcu_read_lock(&kvm->srcu);
421 spin_lock(&kvm->mmu_lock);
422
423
424
425
426
427 kvm->mmu_notifier_count++;
428 need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end);
429 need_tlb_flush |= kvm->tlbs_dirty;
430
431 if (need_tlb_flush)
432 kvm_flush_remote_tlbs(kvm);
433
434 spin_unlock(&kvm->mmu_lock);
435 srcu_read_unlock(&kvm->srcu, idx);
436
437 return 0;
438 }
439
440 static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
441 const struct mmu_notifier_range *range)
442 {
443 struct kvm *kvm = mmu_notifier_to_kvm(mn);
444
445 spin_lock(&kvm->mmu_lock);
446
447
448
449
450
451 kvm->mmu_notifier_seq++;
452 smp_wmb();
453
454
455
456
457
458 kvm->mmu_notifier_count--;
459 spin_unlock(&kvm->mmu_lock);
460
461 BUG_ON(kvm->mmu_notifier_count < 0);
462 }
463
464 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
465 struct mm_struct *mm,
466 unsigned long start,
467 unsigned long end)
468 {
469 struct kvm *kvm = mmu_notifier_to_kvm(mn);
470 int young, idx;
471
472 idx = srcu_read_lock(&kvm->srcu);
473 spin_lock(&kvm->mmu_lock);
474
475 young = kvm_age_hva(kvm, start, end);
476 if (young)
477 kvm_flush_remote_tlbs(kvm);
478
479 spin_unlock(&kvm->mmu_lock);
480 srcu_read_unlock(&kvm->srcu, idx);
481
482 return young;
483 }
484
485 static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
486 struct mm_struct *mm,
487 unsigned long start,
488 unsigned long end)
489 {
490 struct kvm *kvm = mmu_notifier_to_kvm(mn);
491 int young, idx;
492
493 idx = srcu_read_lock(&kvm->srcu);
494 spin_lock(&kvm->mmu_lock);
495
496
497
498
499
500
501
502
503
504
505
506
507
508 young = kvm_age_hva(kvm, start, end);
509 spin_unlock(&kvm->mmu_lock);
510 srcu_read_unlock(&kvm->srcu, idx);
511
512 return young;
513 }
514
515 static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
516 struct mm_struct *mm,
517 unsigned long address)
518 {
519 struct kvm *kvm = mmu_notifier_to_kvm(mn);
520 int young, idx;
521
522 idx = srcu_read_lock(&kvm->srcu);
523 spin_lock(&kvm->mmu_lock);
524 young = kvm_test_age_hva(kvm, address);
525 spin_unlock(&kvm->mmu_lock);
526 srcu_read_unlock(&kvm->srcu, idx);
527
528 return young;
529 }
530
531 static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
532 struct mm_struct *mm)
533 {
534 struct kvm *kvm = mmu_notifier_to_kvm(mn);
535 int idx;
536
537 idx = srcu_read_lock(&kvm->srcu);
538 kvm_arch_flush_shadow_all(kvm);
539 srcu_read_unlock(&kvm->srcu, idx);
540 }
541
542 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
543 .invalidate_range = kvm_mmu_notifier_invalidate_range,
544 .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
545 .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
546 .clear_flush_young = kvm_mmu_notifier_clear_flush_young,
547 .clear_young = kvm_mmu_notifier_clear_young,
548 .test_young = kvm_mmu_notifier_test_young,
549 .change_pte = kvm_mmu_notifier_change_pte,
550 .release = kvm_mmu_notifier_release,
551 };
552
553 static int kvm_init_mmu_notifier(struct kvm *kvm)
554 {
555 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
556 return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
557 }
558
559 #else
560
561 static int kvm_init_mmu_notifier(struct kvm *kvm)
562 {
563 return 0;
564 }
565
566 #endif
567
568 static struct kvm_memslots *kvm_alloc_memslots(void)
569 {
570 int i;
571 struct kvm_memslots *slots;
572
573 slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
574 if (!slots)
575 return NULL;
576
577 for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
578 slots->id_to_index[i] = slots->memslots[i].id = i;
579
580 return slots;
581 }
582
583 static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
584 {
585 if (!memslot->dirty_bitmap)
586 return;
587
588 kvfree(memslot->dirty_bitmap);
589 memslot->dirty_bitmap = NULL;
590 }
591
592
593
594
595 static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
596 struct kvm_memory_slot *dont)
597 {
598 if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
599 kvm_destroy_dirty_bitmap(free);
600
601 kvm_arch_free_memslot(kvm, free, dont);
602
603 free->npages = 0;
604 }
605
606 static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
607 {
608 struct kvm_memory_slot *memslot;
609
610 if (!slots)
611 return;
612
613 kvm_for_each_memslot(memslot, slots)
614 kvm_free_memslot(kvm, memslot, NULL);
615
616 kvfree(slots);
617 }
618
619 static void kvm_destroy_vm_debugfs(struct kvm *kvm)
620 {
621 int i;
622
623 if (!kvm->debugfs_dentry)
624 return;
625
626 debugfs_remove_recursive(kvm->debugfs_dentry);
627
628 if (kvm->debugfs_stat_data) {
629 for (i = 0; i < kvm_debugfs_num_entries; i++)
630 kfree(kvm->debugfs_stat_data[i]);
631 kfree(kvm->debugfs_stat_data);
632 }
633 }
634
635 static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
636 {
637 char dir_name[ITOA_MAX_LEN * 2];
638 struct kvm_stat_data *stat_data;
639 struct kvm_stats_debugfs_item *p;
640
641 if (!debugfs_initialized())
642 return 0;
643
644 snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
645 kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir);
646
647 kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
648 sizeof(*kvm->debugfs_stat_data),
649 GFP_KERNEL_ACCOUNT);
650 if (!kvm->debugfs_stat_data)
651 return -ENOMEM;
652
653 for (p = debugfs_entries; p->name; p++) {
654 stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
655 if (!stat_data)
656 return -ENOMEM;
657
658 stat_data->kvm = kvm;
659 stat_data->offset = p->offset;
660 stat_data->mode = p->mode ? p->mode : 0644;
661 kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
662 debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry,
663 stat_data, stat_fops_per_vm[p->kind]);
664 }
665 return 0;
666 }
667
668
669
670
671
672 int __weak kvm_arch_post_init_vm(struct kvm *kvm)
673 {
674 return 0;
675 }
676
677
678
679
680
681 void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
682 {
683 }
684
685 static struct kvm *kvm_create_vm(unsigned long type)
686 {
687 struct kvm *kvm = kvm_arch_alloc_vm();
688 int r = -ENOMEM;
689 int i;
690
691 if (!kvm)
692 return ERR_PTR(-ENOMEM);
693
694 spin_lock_init(&kvm->mmu_lock);
695 mmgrab(current->mm);
696 kvm->mm = current->mm;
697 kvm_eventfd_init(kvm);
698 mutex_init(&kvm->lock);
699 mutex_init(&kvm->irq_lock);
700 mutex_init(&kvm->slots_lock);
701 INIT_LIST_HEAD(&kvm->devices);
702
703 BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
704
705 if (init_srcu_struct(&kvm->srcu))
706 goto out_err_no_srcu;
707 if (init_srcu_struct(&kvm->irq_srcu))
708 goto out_err_no_irq_srcu;
709
710 refcount_set(&kvm->users_count, 1);
711 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
712 struct kvm_memslots *slots = kvm_alloc_memslots();
713
714 if (!slots)
715 goto out_err_no_arch_destroy_vm;
716
717 slots->generation = i;
718 rcu_assign_pointer(kvm->memslots[i], slots);
719 }
720
721 for (i = 0; i < KVM_NR_BUSES; i++) {
722 rcu_assign_pointer(kvm->buses[i],
723 kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
724 if (!kvm->buses[i])
725 goto out_err_no_arch_destroy_vm;
726 }
727
728 r = kvm_arch_init_vm(kvm, type);
729 if (r)
730 goto out_err_no_arch_destroy_vm;
731
732 r = hardware_enable_all();
733 if (r)
734 goto out_err_no_disable;
735
736 #ifdef CONFIG_HAVE_KVM_IRQFD
737 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
738 #endif
739
740 r = kvm_init_mmu_notifier(kvm);
741 if (r)
742 goto out_err_no_mmu_notifier;
743
744 r = kvm_arch_post_init_vm(kvm);
745 if (r)
746 goto out_err;
747
748 mutex_lock(&kvm_lock);
749 list_add(&kvm->vm_list, &vm_list);
750 mutex_unlock(&kvm_lock);
751
752 preempt_notifier_inc();
753
754 return kvm;
755
756 out_err:
757 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
758 if (kvm->mmu_notifier.ops)
759 mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
760 #endif
761 out_err_no_mmu_notifier:
762 hardware_disable_all();
763 out_err_no_disable:
764 kvm_arch_destroy_vm(kvm);
765 out_err_no_arch_destroy_vm:
766 WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
767 for (i = 0; i < KVM_NR_BUSES; i++)
768 kfree(kvm_get_bus(kvm, i));
769 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
770 kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
771 cleanup_srcu_struct(&kvm->irq_srcu);
772 out_err_no_irq_srcu:
773 cleanup_srcu_struct(&kvm->srcu);
774 out_err_no_srcu:
775 kvm_arch_free_vm(kvm);
776 mmdrop(current->mm);
777 return ERR_PTR(r);
778 }
779
780 static void kvm_destroy_devices(struct kvm *kvm)
781 {
782 struct kvm_device *dev, *tmp;
783
784
785
786
787
788
789 list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) {
790 list_del(&dev->vm_node);
791 dev->ops->destroy(dev);
792 }
793 }
794
795 static void kvm_destroy_vm(struct kvm *kvm)
796 {
797 int i;
798 struct mm_struct *mm = kvm->mm;
799
800 kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
801 kvm_destroy_vm_debugfs(kvm);
802 kvm_arch_sync_events(kvm);
803 mutex_lock(&kvm_lock);
804 list_del(&kvm->vm_list);
805 mutex_unlock(&kvm_lock);
806 kvm_arch_pre_destroy_vm(kvm);
807
808 kvm_free_irq_routing(kvm);
809 for (i = 0; i < KVM_NR_BUSES; i++) {
810 struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
811
812 if (bus)
813 kvm_io_bus_destroy(bus);
814 kvm->buses[i] = NULL;
815 }
816 kvm_coalesced_mmio_free(kvm);
817 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
818 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
819 #else
820 kvm_arch_flush_shadow_all(kvm);
821 #endif
822 kvm_arch_destroy_vm(kvm);
823 kvm_destroy_devices(kvm);
824 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
825 kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
826 cleanup_srcu_struct(&kvm->irq_srcu);
827 cleanup_srcu_struct(&kvm->srcu);
828 kvm_arch_free_vm(kvm);
829 preempt_notifier_dec();
830 hardware_disable_all();
831 mmdrop(mm);
832 }
833
834 void kvm_get_kvm(struct kvm *kvm)
835 {
836 refcount_inc(&kvm->users_count);
837 }
838 EXPORT_SYMBOL_GPL(kvm_get_kvm);
839
840 void kvm_put_kvm(struct kvm *kvm)
841 {
842 if (refcount_dec_and_test(&kvm->users_count))
843 kvm_destroy_vm(kvm);
844 }
845 EXPORT_SYMBOL_GPL(kvm_put_kvm);
846
847
848 static int kvm_vm_release(struct inode *inode, struct file *filp)
849 {
850 struct kvm *kvm = filp->private_data;
851
852 kvm_irqfd_release(kvm);
853
854 kvm_put_kvm(kvm);
855 return 0;
856 }
857
858
859
860
861
862 static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
863 {
864 unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
865
866 memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL_ACCOUNT);
867 if (!memslot->dirty_bitmap)
868 return -ENOMEM;
869
870 return 0;
871 }
872
873
874
875
876
877
878
879 static void update_memslots(struct kvm_memslots *slots,
880 struct kvm_memory_slot *new,
881 enum kvm_mr_change change)
882 {
883 int id = new->id;
884 int i = slots->id_to_index[id];
885 struct kvm_memory_slot *mslots = slots->memslots;
886
887 WARN_ON(mslots[i].id != id);
888 switch (change) {
889 case KVM_MR_CREATE:
890 slots->used_slots++;
891 WARN_ON(mslots[i].npages || !new->npages);
892 break;
893 case KVM_MR_DELETE:
894 slots->used_slots--;
895 WARN_ON(new->npages || !mslots[i].npages);
896 break;
897 default:
898 break;
899 }
900
901 while (i < KVM_MEM_SLOTS_NUM - 1 &&
902 new->base_gfn <= mslots[i + 1].base_gfn) {
903 if (!mslots[i + 1].npages)
904 break;
905 mslots[i] = mslots[i + 1];
906 slots->id_to_index[mslots[i].id] = i;
907 i++;
908 }
909
910
911
912
913
914
915
916
917
918
919 if (new->npages) {
920 while (i > 0 &&
921 new->base_gfn >= mslots[i - 1].base_gfn) {
922 mslots[i] = mslots[i - 1];
923 slots->id_to_index[mslots[i].id] = i;
924 i--;
925 }
926 } else
927 WARN_ON_ONCE(i != slots->used_slots);
928
929 mslots[i] = *new;
930 slots->id_to_index[mslots[i].id] = i;
931 }
932
933 static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem)
934 {
935 u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
936
937 #ifdef __KVM_HAVE_READONLY_MEM
938 valid_flags |= KVM_MEM_READONLY;
939 #endif
940
941 if (mem->flags & ~valid_flags)
942 return -EINVAL;
943
944 return 0;
945 }
946
947 static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
948 int as_id, struct kvm_memslots *slots)
949 {
950 struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
951 u64 gen = old_memslots->generation;
952
953 WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
954 slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
955
956 rcu_assign_pointer(kvm->memslots[as_id], slots);
957 synchronize_srcu_expedited(&kvm->srcu);
958
959
960
961
962
963
964
965 gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
966
967
968
969
970
971
972
973
974 gen += KVM_ADDRESS_SPACE_NUM;
975
976 kvm_arch_memslots_updated(kvm, gen);
977
978 slots->generation = gen;
979
980 return old_memslots;
981 }
982
983
984
985
986
987
988
989
990
991 int __kvm_set_memory_region(struct kvm *kvm,
992 const struct kvm_userspace_memory_region *mem)
993 {
994 int r;
995 gfn_t base_gfn;
996 unsigned long npages;
997 struct kvm_memory_slot *slot;
998 struct kvm_memory_slot old, new;
999 struct kvm_memslots *slots = NULL, *old_memslots;
1000 int as_id, id;
1001 enum kvm_mr_change change;
1002
1003 r = check_memory_region_flags(mem);
1004 if (r)
1005 goto out;
1006
1007 r = -EINVAL;
1008 as_id = mem->slot >> 16;
1009 id = (u16)mem->slot;
1010
1011
1012 if (mem->memory_size & (PAGE_SIZE - 1))
1013 goto out;
1014 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
1015 goto out;
1016
1017 if ((id < KVM_USER_MEM_SLOTS) &&
1018 ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
1019 !access_ok((void __user *)(unsigned long)mem->userspace_addr,
1020 mem->memory_size)))
1021 goto out;
1022 if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
1023 goto out;
1024 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
1025 goto out;
1026
1027 slot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
1028 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
1029 npages = mem->memory_size >> PAGE_SHIFT;
1030
1031 if (npages > KVM_MEM_MAX_NR_PAGES)
1032 goto out;
1033
1034 new = old = *slot;
1035
1036 new.id = id;
1037 new.base_gfn = base_gfn;
1038 new.npages = npages;
1039 new.flags = mem->flags;
1040
1041 if (npages) {
1042 if (!old.npages)
1043 change = KVM_MR_CREATE;
1044 else {
1045 if ((mem->userspace_addr != old.userspace_addr) ||
1046 (npages != old.npages) ||
1047 ((new.flags ^ old.flags) & KVM_MEM_READONLY))
1048 goto out;
1049
1050 if (base_gfn != old.base_gfn)
1051 change = KVM_MR_MOVE;
1052 else if (new.flags != old.flags)
1053 change = KVM_MR_FLAGS_ONLY;
1054 else {
1055 r = 0;
1056 goto out;
1057 }
1058 }
1059 } else {
1060 if (!old.npages)
1061 goto out;
1062
1063 change = KVM_MR_DELETE;
1064 new.base_gfn = 0;
1065 new.flags = 0;
1066 }
1067
1068 if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
1069
1070 r = -EEXIST;
1071 kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
1072 if (slot->id == id)
1073 continue;
1074 if (!((base_gfn + npages <= slot->base_gfn) ||
1075 (base_gfn >= slot->base_gfn + slot->npages)))
1076 goto out;
1077 }
1078 }
1079
1080
1081 if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
1082 new.dirty_bitmap = NULL;
1083
1084 r = -ENOMEM;
1085 if (change == KVM_MR_CREATE) {
1086 new.userspace_addr = mem->userspace_addr;
1087
1088 if (kvm_arch_create_memslot(kvm, &new, npages))
1089 goto out_free;
1090 }
1091
1092
1093 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
1094 if (kvm_create_dirty_bitmap(&new) < 0)
1095 goto out_free;
1096 }
1097
1098 slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
1099 if (!slots)
1100 goto out_free;
1101 memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
1102
1103 if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
1104 slot = id_to_memslot(slots, id);
1105 slot->flags |= KVM_MEMSLOT_INVALID;
1106
1107 old_memslots = install_new_memslots(kvm, as_id, slots);
1108
1109
1110
1111
1112
1113
1114
1115
1116 kvm_arch_flush_shadow_memslot(kvm, slot);
1117
1118
1119
1120
1121
1122
1123 slots = old_memslots;
1124 }
1125
1126 r = kvm_arch_prepare_memory_region(kvm, &new, mem, change);
1127 if (r)
1128 goto out_slots;
1129
1130
1131 if (change == KVM_MR_DELETE) {
1132 new.dirty_bitmap = NULL;
1133 memset(&new.arch, 0, sizeof(new.arch));
1134 }
1135
1136 update_memslots(slots, &new, change);
1137 old_memslots = install_new_memslots(kvm, as_id, slots);
1138
1139 kvm_arch_commit_memory_region(kvm, mem, &old, &new, change);
1140
1141 kvm_free_memslot(kvm, &old, &new);
1142 kvfree(old_memslots);
1143 return 0;
1144
1145 out_slots:
1146 kvfree(slots);
1147 out_free:
1148 kvm_free_memslot(kvm, &new, &old);
1149 out:
1150 return r;
1151 }
1152 EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
1153
1154 int kvm_set_memory_region(struct kvm *kvm,
1155 const struct kvm_userspace_memory_region *mem)
1156 {
1157 int r;
1158
1159 mutex_lock(&kvm->slots_lock);
1160 r = __kvm_set_memory_region(kvm, mem);
1161 mutex_unlock(&kvm->slots_lock);
1162 return r;
1163 }
1164 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
1165
1166 static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
1167 struct kvm_userspace_memory_region *mem)
1168 {
1169 if ((u16)mem->slot >= KVM_USER_MEM_SLOTS)
1170 return -EINVAL;
1171
1172 return kvm_set_memory_region(kvm, mem);
1173 }
1174
1175 int kvm_get_dirty_log(struct kvm *kvm,
1176 struct kvm_dirty_log *log, int *is_dirty)
1177 {
1178 struct kvm_memslots *slots;
1179 struct kvm_memory_slot *memslot;
1180 int i, as_id, id;
1181 unsigned long n;
1182 unsigned long any = 0;
1183
1184 as_id = log->slot >> 16;
1185 id = (u16)log->slot;
1186 if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
1187 return -EINVAL;
1188
1189 slots = __kvm_memslots(kvm, as_id);
1190 memslot = id_to_memslot(slots, id);
1191 if (!memslot->dirty_bitmap)
1192 return -ENOENT;
1193
1194 n = kvm_dirty_bitmap_bytes(memslot);
1195
1196 for (i = 0; !any && i < n/sizeof(long); ++i)
1197 any = memslot->dirty_bitmap[i];
1198
1199 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
1200 return -EFAULT;
1201
1202 if (any)
1203 *is_dirty = 1;
1204 return 0;
1205 }
1206 EXPORT_SYMBOL_GPL(kvm_get_dirty_log);
1207
1208 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231 int kvm_get_dirty_log_protect(struct kvm *kvm,
1232 struct kvm_dirty_log *log, bool *flush)
1233 {
1234 struct kvm_memslots *slots;
1235 struct kvm_memory_slot *memslot;
1236 int i, as_id, id;
1237 unsigned long n;
1238 unsigned long *dirty_bitmap;
1239 unsigned long *dirty_bitmap_buffer;
1240
1241 as_id = log->slot >> 16;
1242 id = (u16)log->slot;
1243 if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
1244 return -EINVAL;
1245
1246 slots = __kvm_memslots(kvm, as_id);
1247 memslot = id_to_memslot(slots, id);
1248
1249 dirty_bitmap = memslot->dirty_bitmap;
1250 if (!dirty_bitmap)
1251 return -ENOENT;
1252
1253 n = kvm_dirty_bitmap_bytes(memslot);
1254 *flush = false;
1255 if (kvm->manual_dirty_log_protect) {
1256
1257
1258
1259
1260
1261
1262
1263
1264 dirty_bitmap_buffer = dirty_bitmap;
1265 } else {
1266 dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
1267 memset(dirty_bitmap_buffer, 0, n);
1268
1269 spin_lock(&kvm->mmu_lock);
1270 for (i = 0; i < n / sizeof(long); i++) {
1271 unsigned long mask;
1272 gfn_t offset;
1273
1274 if (!dirty_bitmap[i])
1275 continue;
1276
1277 *flush = true;
1278 mask = xchg(&dirty_bitmap[i], 0);
1279 dirty_bitmap_buffer[i] = mask;
1280
1281 offset = i * BITS_PER_LONG;
1282 kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
1283 offset, mask);
1284 }
1285 spin_unlock(&kvm->mmu_lock);
1286 }
1287
1288 if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
1289 return -EFAULT;
1290 return 0;
1291 }
1292 EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect);
1293
1294
1295
1296
1297
1298
1299
1300
1301 int kvm_clear_dirty_log_protect(struct kvm *kvm,
1302 struct kvm_clear_dirty_log *log, bool *flush)
1303 {
1304 struct kvm_memslots *slots;
1305 struct kvm_memory_slot *memslot;
1306 int as_id, id;
1307 gfn_t offset;
1308 unsigned long i, n;
1309 unsigned long *dirty_bitmap;
1310 unsigned long *dirty_bitmap_buffer;
1311
1312 as_id = log->slot >> 16;
1313 id = (u16)log->slot;
1314 if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
1315 return -EINVAL;
1316
1317 if (log->first_page & 63)
1318 return -EINVAL;
1319
1320 slots = __kvm_memslots(kvm, as_id);
1321 memslot = id_to_memslot(slots, id);
1322
1323 dirty_bitmap = memslot->dirty_bitmap;
1324 if (!dirty_bitmap)
1325 return -ENOENT;
1326
1327 n = ALIGN(log->num_pages, BITS_PER_LONG) / 8;
1328
1329 if (log->first_page > memslot->npages ||
1330 log->num_pages > memslot->npages - log->first_page ||
1331 (log->num_pages < memslot->npages - log->first_page && (log->num_pages & 63)))
1332 return -EINVAL;
1333
1334 *flush = false;
1335 dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
1336 if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n))
1337 return -EFAULT;
1338
1339 spin_lock(&kvm->mmu_lock);
1340 for (offset = log->first_page, i = offset / BITS_PER_LONG,
1341 n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--;
1342 i++, offset += BITS_PER_LONG) {
1343 unsigned long mask = *dirty_bitmap_buffer++;
1344 atomic_long_t *p = (atomic_long_t *) &dirty_bitmap[i];
1345 if (!mask)
1346 continue;
1347
1348 mask &= atomic_long_fetch_andnot(mask, p);
1349
1350
1351
1352
1353
1354
1355
1356 if (mask) {
1357 *flush = true;
1358 kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
1359 offset, mask);
1360 }
1361 }
1362 spin_unlock(&kvm->mmu_lock);
1363
1364 return 0;
1365 }
1366 EXPORT_SYMBOL_GPL(kvm_clear_dirty_log_protect);
1367 #endif
1368
1369 bool kvm_largepages_enabled(void)
1370 {
1371 return largepages_enabled;
1372 }
1373
1374 void kvm_disable_largepages(void)
1375 {
1376 largepages_enabled = false;
1377 }
1378 EXPORT_SYMBOL_GPL(kvm_disable_largepages);
1379
1380 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
1381 {
1382 return __gfn_to_memslot(kvm_memslots(kvm), gfn);
1383 }
1384 EXPORT_SYMBOL_GPL(gfn_to_memslot);
1385
1386 struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn)
1387 {
1388 return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
1389 }
1390
1391 bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
1392 {
1393 struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
1394
1395 if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
1396 memslot->flags & KVM_MEMSLOT_INVALID)
1397 return false;
1398
1399 return true;
1400 }
1401 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
1402
1403 unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn)
1404 {
1405 struct vm_area_struct *vma;
1406 unsigned long addr, size;
1407
1408 size = PAGE_SIZE;
1409
1410 addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL);
1411 if (kvm_is_error_hva(addr))
1412 return PAGE_SIZE;
1413
1414 down_read(¤t->mm->mmap_sem);
1415 vma = find_vma(current->mm, addr);
1416 if (!vma)
1417 goto out;
1418
1419 size = vma_kernel_pagesize(vma);
1420
1421 out:
1422 up_read(¤t->mm->mmap_sem);
1423
1424 return size;
1425 }
1426
1427 static bool memslot_is_readonly(struct kvm_memory_slot *slot)
1428 {
1429 return slot->flags & KVM_MEM_READONLY;
1430 }
1431
1432 static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
1433 gfn_t *nr_pages, bool write)
1434 {
1435 if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
1436 return KVM_HVA_ERR_BAD;
1437
1438 if (memslot_is_readonly(slot) && write)
1439 return KVM_HVA_ERR_RO_BAD;
1440
1441 if (nr_pages)
1442 *nr_pages = slot->npages - (gfn - slot->base_gfn);
1443
1444 return __gfn_to_hva_memslot(slot, gfn);
1445 }
1446
1447 static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
1448 gfn_t *nr_pages)
1449 {
1450 return __gfn_to_hva_many(slot, gfn, nr_pages, true);
1451 }
1452
1453 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
1454 gfn_t gfn)
1455 {
1456 return gfn_to_hva_many(slot, gfn, NULL);
1457 }
1458 EXPORT_SYMBOL_GPL(gfn_to_hva_memslot);
1459
1460 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
1461 {
1462 return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
1463 }
1464 EXPORT_SYMBOL_GPL(gfn_to_hva);
1465
1466 unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn)
1467 {
1468 return gfn_to_hva_many(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, NULL);
1469 }
1470 EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva);
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480 unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
1481 gfn_t gfn, bool *writable)
1482 {
1483 unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
1484
1485 if (!kvm_is_error_hva(hva) && writable)
1486 *writable = !memslot_is_readonly(slot);
1487
1488 return hva;
1489 }
1490
1491 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
1492 {
1493 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
1494
1495 return gfn_to_hva_memslot_prot(slot, gfn, writable);
1496 }
1497
1498 unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable)
1499 {
1500 struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
1501
1502 return gfn_to_hva_memslot_prot(slot, gfn, writable);
1503 }
1504
1505 static inline int check_user_page_hwpoison(unsigned long addr)
1506 {
1507 int rc, flags = FOLL_HWPOISON | FOLL_WRITE;
1508
1509 rc = get_user_pages(addr, 1, flags, NULL, NULL);
1510 return rc == -EHWPOISON;
1511 }
1512
1513
1514
1515
1516
1517
1518 static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
1519 bool *writable, kvm_pfn_t *pfn)
1520 {
1521 struct page *page[1];
1522 int npages;
1523
1524
1525
1526
1527
1528
1529 if (!(write_fault || writable))
1530 return false;
1531
1532 npages = __get_user_pages_fast(addr, 1, 1, page);
1533 if (npages == 1) {
1534 *pfn = page_to_pfn(page[0]);
1535
1536 if (writable)
1537 *writable = true;
1538 return true;
1539 }
1540
1541 return false;
1542 }
1543
1544
1545
1546
1547
1548 static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
1549 bool *writable, kvm_pfn_t *pfn)
1550 {
1551 unsigned int flags = FOLL_HWPOISON;
1552 struct page *page;
1553 int npages = 0;
1554
1555 might_sleep();
1556
1557 if (writable)
1558 *writable = write_fault;
1559
1560 if (write_fault)
1561 flags |= FOLL_WRITE;
1562 if (async)
1563 flags |= FOLL_NOWAIT;
1564
1565 npages = get_user_pages_unlocked(addr, 1, &page, flags);
1566 if (npages != 1)
1567 return npages;
1568
1569
1570 if (unlikely(!write_fault) && writable) {
1571 struct page *wpage;
1572
1573 if (__get_user_pages_fast(addr, 1, 1, &wpage) == 1) {
1574 *writable = true;
1575 put_page(page);
1576 page = wpage;
1577 }
1578 }
1579 *pfn = page_to_pfn(page);
1580 return npages;
1581 }
1582
1583 static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
1584 {
1585 if (unlikely(!(vma->vm_flags & VM_READ)))
1586 return false;
1587
1588 if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE))))
1589 return false;
1590
1591 return true;
1592 }
1593
1594 static int hva_to_pfn_remapped(struct vm_area_struct *vma,
1595 unsigned long addr, bool *async,
1596 bool write_fault, bool *writable,
1597 kvm_pfn_t *p_pfn)
1598 {
1599 unsigned long pfn;
1600 int r;
1601
1602 r = follow_pfn(vma, addr, &pfn);
1603 if (r) {
1604
1605
1606
1607
1608 bool unlocked = false;
1609 r = fixup_user_fault(current, current->mm, addr,
1610 (write_fault ? FAULT_FLAG_WRITE : 0),
1611 &unlocked);
1612 if (unlocked)
1613 return -EAGAIN;
1614 if (r)
1615 return r;
1616
1617 r = follow_pfn(vma, addr, &pfn);
1618 if (r)
1619 return r;
1620
1621 }
1622
1623 if (writable)
1624 *writable = true;
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637 kvm_get_pfn(pfn);
1638
1639 *p_pfn = pfn;
1640 return 0;
1641 }
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657 static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
1658 bool write_fault, bool *writable)
1659 {
1660 struct vm_area_struct *vma;
1661 kvm_pfn_t pfn = 0;
1662 int npages, r;
1663
1664
1665 BUG_ON(atomic && async);
1666
1667 if (hva_to_pfn_fast(addr, write_fault, writable, &pfn))
1668 return pfn;
1669
1670 if (atomic)
1671 return KVM_PFN_ERR_FAULT;
1672
1673 npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn);
1674 if (npages == 1)
1675 return pfn;
1676
1677 down_read(¤t->mm->mmap_sem);
1678 if (npages == -EHWPOISON ||
1679 (!async && check_user_page_hwpoison(addr))) {
1680 pfn = KVM_PFN_ERR_HWPOISON;
1681 goto exit;
1682 }
1683
1684 retry:
1685 vma = find_vma_intersection(current->mm, addr, addr + 1);
1686
1687 if (vma == NULL)
1688 pfn = KVM_PFN_ERR_FAULT;
1689 else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
1690 r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn);
1691 if (r == -EAGAIN)
1692 goto retry;
1693 if (r < 0)
1694 pfn = KVM_PFN_ERR_FAULT;
1695 } else {
1696 if (async && vma_is_valid(vma, write_fault))
1697 *async = true;
1698 pfn = KVM_PFN_ERR_FAULT;
1699 }
1700 exit:
1701 up_read(¤t->mm->mmap_sem);
1702 return pfn;
1703 }
1704
1705 kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
1706 bool atomic, bool *async, bool write_fault,
1707 bool *writable)
1708 {
1709 unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
1710
1711 if (addr == KVM_HVA_ERR_RO_BAD) {
1712 if (writable)
1713 *writable = false;
1714 return KVM_PFN_ERR_RO_FAULT;
1715 }
1716
1717 if (kvm_is_error_hva(addr)) {
1718 if (writable)
1719 *writable = false;
1720 return KVM_PFN_NOSLOT;
1721 }
1722
1723
1724 if (writable && memslot_is_readonly(slot)) {
1725 *writable = false;
1726 writable = NULL;
1727 }
1728
1729 return hva_to_pfn(addr, atomic, async, write_fault,
1730 writable);
1731 }
1732 EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
1733
1734 kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
1735 bool *writable)
1736 {
1737 return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL,
1738 write_fault, writable);
1739 }
1740 EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
1741
1742 kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
1743 {
1744 return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
1745 }
1746 EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
1747
1748 kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
1749 {
1750 return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
1751 }
1752 EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
1753
1754 kvm_pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
1755 {
1756 return gfn_to_pfn_memslot_atomic(gfn_to_memslot(kvm, gfn), gfn);
1757 }
1758 EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
1759
1760 kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn)
1761 {
1762 return gfn_to_pfn_memslot_atomic(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn);
1763 }
1764 EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn_atomic);
1765
1766 kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
1767 {
1768 return gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn);
1769 }
1770 EXPORT_SYMBOL_GPL(gfn_to_pfn);
1771
1772 kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
1773 {
1774 return gfn_to_pfn_memslot(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn);
1775 }
1776 EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_pfn);
1777
1778 int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
1779 struct page **pages, int nr_pages)
1780 {
1781 unsigned long addr;
1782 gfn_t entry = 0;
1783
1784 addr = gfn_to_hva_many(slot, gfn, &entry);
1785 if (kvm_is_error_hva(addr))
1786 return -1;
1787
1788 if (entry < nr_pages)
1789 return 0;
1790
1791 return __get_user_pages_fast(addr, nr_pages, 1, pages);
1792 }
1793 EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
1794
1795 static struct page *kvm_pfn_to_page(kvm_pfn_t pfn)
1796 {
1797 if (is_error_noslot_pfn(pfn))
1798 return KVM_ERR_PTR_BAD_PAGE;
1799
1800 if (kvm_is_reserved_pfn(pfn)) {
1801 WARN_ON(1);
1802 return KVM_ERR_PTR_BAD_PAGE;
1803 }
1804
1805 return pfn_to_page(pfn);
1806 }
1807
1808 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
1809 {
1810 kvm_pfn_t pfn;
1811
1812 pfn = gfn_to_pfn(kvm, gfn);
1813
1814 return kvm_pfn_to_page(pfn);
1815 }
1816 EXPORT_SYMBOL_GPL(gfn_to_page);
1817
1818 void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache)
1819 {
1820 if (pfn == 0)
1821 return;
1822
1823 if (cache)
1824 cache->pfn = cache->gfn = 0;
1825
1826 if (dirty)
1827 kvm_release_pfn_dirty(pfn);
1828 else
1829 kvm_release_pfn_clean(pfn);
1830 }
1831
1832 static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn,
1833 struct gfn_to_pfn_cache *cache, u64 gen)
1834 {
1835 kvm_release_pfn(cache->pfn, cache->dirty, cache);
1836
1837 cache->pfn = gfn_to_pfn_memslot(slot, gfn);
1838 cache->gfn = gfn;
1839 cache->dirty = false;
1840 cache->generation = gen;
1841 }
1842
1843 static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
1844 struct kvm_host_map *map,
1845 struct gfn_to_pfn_cache *cache,
1846 bool atomic)
1847 {
1848 kvm_pfn_t pfn;
1849 void *hva = NULL;
1850 struct page *page = KVM_UNMAPPED_PAGE;
1851 struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
1852 u64 gen = slots->generation;
1853
1854 if (!map)
1855 return -EINVAL;
1856
1857 if (cache) {
1858 if (!cache->pfn || cache->gfn != gfn ||
1859 cache->generation != gen) {
1860 if (atomic)
1861 return -EAGAIN;
1862 kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
1863 }
1864 pfn = cache->pfn;
1865 } else {
1866 if (atomic)
1867 return -EAGAIN;
1868 pfn = gfn_to_pfn_memslot(slot, gfn);
1869 }
1870 if (is_error_noslot_pfn(pfn))
1871 return -EINVAL;
1872
1873 if (pfn_valid(pfn)) {
1874 page = pfn_to_page(pfn);
1875 if (atomic)
1876 hva = kmap_atomic(page);
1877 else
1878 hva = kmap(page);
1879 #ifdef CONFIG_HAS_IOMEM
1880 } else if (!atomic) {
1881 hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
1882 } else {
1883 return -EINVAL;
1884 #endif
1885 }
1886
1887 if (!hva)
1888 return -EFAULT;
1889
1890 map->page = page;
1891 map->hva = hva;
1892 map->pfn = pfn;
1893 map->gfn = gfn;
1894
1895 return 0;
1896 }
1897
1898 int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
1899 struct gfn_to_pfn_cache *cache, bool atomic)
1900 {
1901 return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map,
1902 cache, atomic);
1903 }
1904 EXPORT_SYMBOL_GPL(kvm_map_gfn);
1905
1906 int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
1907 {
1908 return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map,
1909 NULL, false);
1910 }
1911 EXPORT_SYMBOL_GPL(kvm_vcpu_map);
1912
1913 static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot,
1914 struct kvm_host_map *map,
1915 struct gfn_to_pfn_cache *cache,
1916 bool dirty, bool atomic)
1917 {
1918 if (!map)
1919 return;
1920
1921 if (!map->hva)
1922 return;
1923
1924 if (map->page != KVM_UNMAPPED_PAGE) {
1925 if (atomic)
1926 kunmap_atomic(map->hva);
1927 else
1928 kunmap(map->page);
1929 }
1930 #ifdef CONFIG_HAS_IOMEM
1931 else if (!atomic)
1932 memunmap(map->hva);
1933 else
1934 WARN_ONCE(1, "Unexpected unmapping in atomic context");
1935 #endif
1936
1937 if (dirty)
1938 mark_page_dirty_in_slot(memslot, map->gfn);
1939
1940 if (cache)
1941 cache->dirty |= dirty;
1942 else
1943 kvm_release_pfn(map->pfn, dirty, NULL);
1944
1945 map->hva = NULL;
1946 map->page = NULL;
1947 }
1948
1949 int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
1950 struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
1951 {
1952 __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map,
1953 cache, dirty, atomic);
1954 return 0;
1955 }
1956 EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
1957
1958 void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
1959 {
1960 __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL,
1961 dirty, false);
1962 }
1963 EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
1964
1965 struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
1966 {
1967 kvm_pfn_t pfn;
1968
1969 pfn = kvm_vcpu_gfn_to_pfn(vcpu, gfn);
1970
1971 return kvm_pfn_to_page(pfn);
1972 }
1973 EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_page);
1974
1975 void kvm_release_page_clean(struct page *page)
1976 {
1977 WARN_ON(is_error_page(page));
1978
1979 kvm_release_pfn_clean(page_to_pfn(page));
1980 }
1981 EXPORT_SYMBOL_GPL(kvm_release_page_clean);
1982
1983 void kvm_release_pfn_clean(kvm_pfn_t pfn)
1984 {
1985 if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
1986 put_page(pfn_to_page(pfn));
1987 }
1988 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
1989
1990 void kvm_release_page_dirty(struct page *page)
1991 {
1992 WARN_ON(is_error_page(page));
1993
1994 kvm_release_pfn_dirty(page_to_pfn(page));
1995 }
1996 EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
1997
1998 void kvm_release_pfn_dirty(kvm_pfn_t pfn)
1999 {
2000 kvm_set_pfn_dirty(pfn);
2001 kvm_release_pfn_clean(pfn);
2002 }
2003 EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
2004
2005 void kvm_set_pfn_dirty(kvm_pfn_t pfn)
2006 {
2007 if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
2008 struct page *page = pfn_to_page(pfn);
2009
2010 SetPageDirty(page);
2011 }
2012 }
2013 EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
2014
2015 void kvm_set_pfn_accessed(kvm_pfn_t pfn)
2016 {
2017 if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
2018 mark_page_accessed(pfn_to_page(pfn));
2019 }
2020 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
2021
2022 void kvm_get_pfn(kvm_pfn_t pfn)
2023 {
2024 if (!kvm_is_reserved_pfn(pfn))
2025 get_page(pfn_to_page(pfn));
2026 }
2027 EXPORT_SYMBOL_GPL(kvm_get_pfn);
2028
2029 static int next_segment(unsigned long len, int offset)
2030 {
2031 if (len > PAGE_SIZE - offset)
2032 return PAGE_SIZE - offset;
2033 else
2034 return len;
2035 }
2036
2037 static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn,
2038 void *data, int offset, int len)
2039 {
2040 int r;
2041 unsigned long addr;
2042
2043 addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
2044 if (kvm_is_error_hva(addr))
2045 return -EFAULT;
2046 r = __copy_from_user(data, (void __user *)addr + offset, len);
2047 if (r)
2048 return -EFAULT;
2049 return 0;
2050 }
2051
2052 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
2053 int len)
2054 {
2055 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
2056
2057 return __kvm_read_guest_page(slot, gfn, data, offset, len);
2058 }
2059 EXPORT_SYMBOL_GPL(kvm_read_guest_page);
2060
2061 int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data,
2062 int offset, int len)
2063 {
2064 struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
2065
2066 return __kvm_read_guest_page(slot, gfn, data, offset, len);
2067 }
2068 EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_page);
2069
2070 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
2071 {
2072 gfn_t gfn = gpa >> PAGE_SHIFT;
2073 int seg;
2074 int offset = offset_in_page(gpa);
2075 int ret;
2076
2077 while ((seg = next_segment(len, offset)) != 0) {
2078 ret = kvm_read_guest_page(kvm, gfn, data, offset, seg);
2079 if (ret < 0)
2080 return ret;
2081 offset = 0;
2082 len -= seg;
2083 data += seg;
2084 ++gfn;
2085 }
2086 return 0;
2087 }
2088 EXPORT_SYMBOL_GPL(kvm_read_guest);
2089
2090 int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len)
2091 {
2092 gfn_t gfn = gpa >> PAGE_SHIFT;
2093 int seg;
2094 int offset = offset_in_page(gpa);
2095 int ret;
2096
2097 while ((seg = next_segment(len, offset)) != 0) {
2098 ret = kvm_vcpu_read_guest_page(vcpu, gfn, data, offset, seg);
2099 if (ret < 0)
2100 return ret;
2101 offset = 0;
2102 len -= seg;
2103 data += seg;
2104 ++gfn;
2105 }
2106 return 0;
2107 }
2108 EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest);
2109
2110 static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
2111 void *data, int offset, unsigned long len)
2112 {
2113 int r;
2114 unsigned long addr;
2115
2116 addr = gfn_to_hva_memslot_prot(slot, gfn, NULL);
2117 if (kvm_is_error_hva(addr))
2118 return -EFAULT;
2119 pagefault_disable();
2120 r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
2121 pagefault_enable();
2122 if (r)
2123 return -EFAULT;
2124 return 0;
2125 }
2126
2127 int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
2128 unsigned long len)
2129 {
2130 gfn_t gfn = gpa >> PAGE_SHIFT;
2131 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
2132 int offset = offset_in_page(gpa);
2133
2134 return __kvm_read_guest_atomic(slot, gfn, data, offset, len);
2135 }
2136 EXPORT_SYMBOL_GPL(kvm_read_guest_atomic);
2137
2138 int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa,
2139 void *data, unsigned long len)
2140 {
2141 gfn_t gfn = gpa >> PAGE_SHIFT;
2142 struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
2143 int offset = offset_in_page(gpa);
2144
2145 return __kvm_read_guest_atomic(slot, gfn, data, offset, len);
2146 }
2147 EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_atomic);
2148
2149 static int __kvm_write_guest_page(struct kvm_memory_slot *memslot, gfn_t gfn,
2150 const void *data, int offset, int len)
2151 {
2152 int r;
2153 unsigned long addr;
2154
2155 addr = gfn_to_hva_memslot(memslot, gfn);
2156 if (kvm_is_error_hva(addr))
2157 return -EFAULT;
2158 r = __copy_to_user((void __user *)addr + offset, data, len);
2159 if (r)
2160 return -EFAULT;
2161 mark_page_dirty_in_slot(memslot, gfn);
2162 return 0;
2163 }
2164
2165 int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn,
2166 const void *data, int offset, int len)
2167 {
2168 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
2169
2170 return __kvm_write_guest_page(slot, gfn, data, offset, len);
2171 }
2172 EXPORT_SYMBOL_GPL(kvm_write_guest_page);
2173
2174 int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
2175 const void *data, int offset, int len)
2176 {
2177 struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
2178
2179 return __kvm_write_guest_page(slot, gfn, data, offset, len);
2180 }
2181 EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_page);
2182
2183 int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
2184 unsigned long len)
2185 {
2186 gfn_t gfn = gpa >> PAGE_SHIFT;
2187 int seg;
2188 int offset = offset_in_page(gpa);
2189 int ret;
2190
2191 while ((seg = next_segment(len, offset)) != 0) {
2192 ret = kvm_write_guest_page(kvm, gfn, data, offset, seg);
2193 if (ret < 0)
2194 return ret;
2195 offset = 0;
2196 len -= seg;
2197 data += seg;
2198 ++gfn;
2199 }
2200 return 0;
2201 }
2202 EXPORT_SYMBOL_GPL(kvm_write_guest);
2203
2204 int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
2205 unsigned long len)
2206 {
2207 gfn_t gfn = gpa >> PAGE_SHIFT;
2208 int seg;
2209 int offset = offset_in_page(gpa);
2210 int ret;
2211
2212 while ((seg = next_segment(len, offset)) != 0) {
2213 ret = kvm_vcpu_write_guest_page(vcpu, gfn, data, offset, seg);
2214 if (ret < 0)
2215 return ret;
2216 offset = 0;
2217 len -= seg;
2218 data += seg;
2219 ++gfn;
2220 }
2221 return 0;
2222 }
2223 EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest);
2224
2225 static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots,
2226 struct gfn_to_hva_cache *ghc,
2227 gpa_t gpa, unsigned long len)
2228 {
2229 int offset = offset_in_page(gpa);
2230 gfn_t start_gfn = gpa >> PAGE_SHIFT;
2231 gfn_t end_gfn = (gpa + len - 1) >> PAGE_SHIFT;
2232 gfn_t nr_pages_needed = end_gfn - start_gfn + 1;
2233 gfn_t nr_pages_avail;
2234 int r = start_gfn <= end_gfn ? 0 : -EINVAL;
2235
2236 ghc->gpa = gpa;
2237 ghc->generation = slots->generation;
2238 ghc->len = len;
2239 ghc->hva = KVM_HVA_ERR_BAD;
2240
2241
2242
2243
2244
2245 while (!r && start_gfn <= end_gfn) {
2246 ghc->memslot = __gfn_to_memslot(slots, start_gfn);
2247 ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn,
2248 &nr_pages_avail);
2249 if (kvm_is_error_hva(ghc->hva))
2250 r = -EFAULT;
2251 start_gfn += nr_pages_avail;
2252 }
2253
2254
2255 if (!r && nr_pages_needed == 1)
2256 ghc->hva += offset;
2257 else
2258 ghc->memslot = NULL;
2259
2260 return r;
2261 }
2262
2263 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
2264 gpa_t gpa, unsigned long len)
2265 {
2266 struct kvm_memslots *slots = kvm_memslots(kvm);
2267 return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len);
2268 }
2269 EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init);
2270
2271 int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
2272 void *data, unsigned int offset,
2273 unsigned long len)
2274 {
2275 struct kvm_memslots *slots = kvm_memslots(kvm);
2276 int r;
2277 gpa_t gpa = ghc->gpa + offset;
2278
2279 BUG_ON(len + offset > ghc->len);
2280
2281 if (slots->generation != ghc->generation)
2282 __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
2283
2284 if (kvm_is_error_hva(ghc->hva))
2285 return -EFAULT;
2286
2287 if (unlikely(!ghc->memslot))
2288 return kvm_write_guest(kvm, gpa, data, len);
2289
2290 r = __copy_to_user((void __user *)ghc->hva + offset, data, len);
2291 if (r)
2292 return -EFAULT;
2293 mark_page_dirty_in_slot(ghc->memslot, gpa >> PAGE_SHIFT);
2294
2295 return 0;
2296 }
2297 EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached);
2298
2299 int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
2300 void *data, unsigned long len)
2301 {
2302 return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len);
2303 }
2304 EXPORT_SYMBOL_GPL(kvm_write_guest_cached);
2305
2306 int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
2307 void *data, unsigned long len)
2308 {
2309 struct kvm_memslots *slots = kvm_memslots(kvm);
2310 int r;
2311
2312 BUG_ON(len > ghc->len);
2313
2314 if (slots->generation != ghc->generation)
2315 __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len);
2316
2317 if (kvm_is_error_hva(ghc->hva))
2318 return -EFAULT;
2319
2320 if (unlikely(!ghc->memslot))
2321 return kvm_read_guest(kvm, ghc->gpa, data, len);
2322
2323 r = __copy_from_user(data, (void __user *)ghc->hva, len);
2324 if (r)
2325 return -EFAULT;
2326
2327 return 0;
2328 }
2329 EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
2330
2331 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
2332 {
2333 const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
2334
2335 return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
2336 }
2337 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
2338
2339 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
2340 {
2341 gfn_t gfn = gpa >> PAGE_SHIFT;
2342 int seg;
2343 int offset = offset_in_page(gpa);
2344 int ret;
2345
2346 while ((seg = next_segment(len, offset)) != 0) {
2347 ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
2348 if (ret < 0)
2349 return ret;
2350 offset = 0;
2351 len -= seg;
2352 ++gfn;
2353 }
2354 return 0;
2355 }
2356 EXPORT_SYMBOL_GPL(kvm_clear_guest);
2357
2358 static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot,
2359 gfn_t gfn)
2360 {
2361 if (memslot && memslot->dirty_bitmap) {
2362 unsigned long rel_gfn = gfn - memslot->base_gfn;
2363
2364 set_bit_le(rel_gfn, memslot->dirty_bitmap);
2365 }
2366 }
2367
2368 void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
2369 {
2370 struct kvm_memory_slot *memslot;
2371
2372 memslot = gfn_to_memslot(kvm, gfn);
2373 mark_page_dirty_in_slot(memslot, gfn);
2374 }
2375 EXPORT_SYMBOL_GPL(mark_page_dirty);
2376
2377 void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
2378 {
2379 struct kvm_memory_slot *memslot;
2380
2381 memslot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
2382 mark_page_dirty_in_slot(memslot, gfn);
2383 }
2384 EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
2385
2386 void kvm_sigset_activate(struct kvm_vcpu *vcpu)
2387 {
2388 if (!vcpu->sigset_active)
2389 return;
2390
2391
2392
2393
2394
2395
2396
2397 sigprocmask(SIG_SETMASK, &vcpu->sigset, ¤t->real_blocked);
2398 }
2399
2400 void kvm_sigset_deactivate(struct kvm_vcpu *vcpu)
2401 {
2402 if (!vcpu->sigset_active)
2403 return;
2404
2405 sigprocmask(SIG_SETMASK, ¤t->real_blocked, NULL);
2406 sigemptyset(¤t->real_blocked);
2407 }
2408
2409 static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
2410 {
2411 unsigned int old, val, grow, grow_start;
2412
2413 old = val = vcpu->halt_poll_ns;
2414 grow_start = READ_ONCE(halt_poll_ns_grow_start);
2415 grow = READ_ONCE(halt_poll_ns_grow);
2416 if (!grow)
2417 goto out;
2418
2419 val *= grow;
2420 if (val < grow_start)
2421 val = grow_start;
2422
2423 if (val > halt_poll_ns)
2424 val = halt_poll_ns;
2425
2426 vcpu->halt_poll_ns = val;
2427 out:
2428 trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
2429 }
2430
2431 static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
2432 {
2433 unsigned int old, val, shrink;
2434
2435 old = val = vcpu->halt_poll_ns;
2436 shrink = READ_ONCE(halt_poll_ns_shrink);
2437 if (shrink == 0)
2438 val = 0;
2439 else
2440 val /= shrink;
2441
2442 vcpu->halt_poll_ns = val;
2443 trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
2444 }
2445
2446 static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
2447 {
2448 int ret = -EINTR;
2449 int idx = srcu_read_lock(&vcpu->kvm->srcu);
2450
2451 if (kvm_arch_vcpu_runnable(vcpu)) {
2452 kvm_make_request(KVM_REQ_UNHALT, vcpu);
2453 goto out;
2454 }
2455 if (kvm_cpu_has_pending_timer(vcpu))
2456 goto out;
2457 if (signal_pending(current))
2458 goto out;
2459
2460 ret = 0;
2461 out:
2462 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2463 return ret;
2464 }
2465
2466
2467
2468
2469 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
2470 {
2471 ktime_t start, cur;
2472 DECLARE_SWAITQUEUE(wait);
2473 bool waited = false;
2474 u64 block_ns;
2475
2476 kvm_arch_vcpu_blocking(vcpu);
2477
2478 start = cur = ktime_get();
2479 if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
2480 ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
2481
2482 ++vcpu->stat.halt_attempted_poll;
2483 do {
2484
2485
2486
2487
2488 if (kvm_vcpu_check_block(vcpu) < 0) {
2489 ++vcpu->stat.halt_successful_poll;
2490 if (!vcpu_valid_wakeup(vcpu))
2491 ++vcpu->stat.halt_poll_invalid;
2492 goto out;
2493 }
2494 cur = ktime_get();
2495 } while (single_task_running() && ktime_before(cur, stop));
2496 }
2497
2498 for (;;) {
2499 prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
2500
2501 if (kvm_vcpu_check_block(vcpu) < 0)
2502 break;
2503
2504 waited = true;
2505 schedule();
2506 }
2507
2508 finish_swait(&vcpu->wq, &wait);
2509 cur = ktime_get();
2510 out:
2511 kvm_arch_vcpu_unblocking(vcpu);
2512 block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
2513
2514 if (!kvm_arch_no_poll(vcpu)) {
2515 if (!vcpu_valid_wakeup(vcpu)) {
2516 shrink_halt_poll_ns(vcpu);
2517 } else if (halt_poll_ns) {
2518 if (block_ns <= vcpu->halt_poll_ns)
2519 ;
2520
2521 else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
2522 shrink_halt_poll_ns(vcpu);
2523
2524 else if (vcpu->halt_poll_ns < halt_poll_ns &&
2525 block_ns < halt_poll_ns)
2526 grow_halt_poll_ns(vcpu);
2527 } else {
2528 vcpu->halt_poll_ns = 0;
2529 }
2530 }
2531
2532 trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
2533 kvm_arch_vcpu_block_finish(vcpu);
2534 }
2535 EXPORT_SYMBOL_GPL(kvm_vcpu_block);
2536
2537 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
2538 {
2539 struct swait_queue_head *wqp;
2540
2541 wqp = kvm_arch_vcpu_wq(vcpu);
2542 if (swq_has_sleeper(wqp)) {
2543 swake_up_one(wqp);
2544 WRITE_ONCE(vcpu->ready, true);
2545 ++vcpu->stat.halt_wakeup;
2546 return true;
2547 }
2548
2549 return false;
2550 }
2551 EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up);
2552
2553 #ifndef CONFIG_S390
2554
2555
2556
2557 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
2558 {
2559 int me;
2560 int cpu = vcpu->cpu;
2561
2562 if (kvm_vcpu_wake_up(vcpu))
2563 return;
2564
2565 me = get_cpu();
2566 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
2567 if (kvm_arch_vcpu_should_kick(vcpu))
2568 smp_send_reschedule(cpu);
2569 put_cpu();
2570 }
2571 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
2572 #endif
2573
2574 int kvm_vcpu_yield_to(struct kvm_vcpu *target)
2575 {
2576 struct pid *pid;
2577 struct task_struct *task = NULL;
2578 int ret = 0;
2579
2580 rcu_read_lock();
2581 pid = rcu_dereference(target->pid);
2582 if (pid)
2583 task = get_pid_task(pid, PIDTYPE_PID);
2584 rcu_read_unlock();
2585 if (!task)
2586 return ret;
2587 ret = yield_to(task, 1);
2588 put_task_struct(task);
2589
2590 return ret;
2591 }
2592 EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616 static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
2617 {
2618 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
2619 bool eligible;
2620
2621 eligible = !vcpu->spin_loop.in_spin_loop ||
2622 vcpu->spin_loop.dy_eligible;
2623
2624 if (vcpu->spin_loop.in_spin_loop)
2625 kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
2626
2627 return eligible;
2628 #else
2629 return true;
2630 #endif
2631 }
2632
2633
2634
2635
2636
2637
2638 bool __weak kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
2639 {
2640 return kvm_arch_vcpu_runnable(vcpu);
2641 }
2642
2643 static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu)
2644 {
2645 if (kvm_arch_dy_runnable(vcpu))
2646 return true;
2647
2648 #ifdef CONFIG_KVM_ASYNC_PF
2649 if (!list_empty_careful(&vcpu->async_pf.done))
2650 return true;
2651 #endif
2652
2653 return false;
2654 }
2655
2656 void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
2657 {
2658 struct kvm *kvm = me->kvm;
2659 struct kvm_vcpu *vcpu;
2660 int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
2661 int yielded = 0;
2662 int try = 3;
2663 int pass;
2664 int i;
2665
2666 kvm_vcpu_set_in_spin_loop(me, true);
2667
2668
2669
2670
2671
2672
2673
2674 for (pass = 0; pass < 2 && !yielded && try; pass++) {
2675 kvm_for_each_vcpu(i, vcpu, kvm) {
2676 if (!pass && i <= last_boosted_vcpu) {
2677 i = last_boosted_vcpu;
2678 continue;
2679 } else if (pass && i > last_boosted_vcpu)
2680 break;
2681 if (!READ_ONCE(vcpu->ready))
2682 continue;
2683 if (vcpu == me)
2684 continue;
2685 if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
2686 continue;
2687 if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
2688 !kvm_arch_vcpu_in_kernel(vcpu))
2689 continue;
2690 if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
2691 continue;
2692
2693 yielded = kvm_vcpu_yield_to(vcpu);
2694 if (yielded > 0) {
2695 kvm->last_boosted_vcpu = i;
2696 break;
2697 } else if (yielded < 0) {
2698 try--;
2699 if (!try)
2700 break;
2701 }
2702 }
2703 }
2704 kvm_vcpu_set_in_spin_loop(me, false);
2705
2706
2707 kvm_vcpu_set_dy_eligible(me, false);
2708 }
2709 EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
2710
2711 static vm_fault_t kvm_vcpu_fault(struct vm_fault *vmf)
2712 {
2713 struct kvm_vcpu *vcpu = vmf->vma->vm_file->private_data;
2714 struct page *page;
2715
2716 if (vmf->pgoff == 0)
2717 page = virt_to_page(vcpu->run);
2718 #ifdef CONFIG_X86
2719 else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
2720 page = virt_to_page(vcpu->arch.pio_data);
2721 #endif
2722 #ifdef CONFIG_KVM_MMIO
2723 else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET)
2724 page = virt_to_page(vcpu->kvm->coalesced_mmio_ring);
2725 #endif
2726 else
2727 return kvm_arch_vcpu_fault(vcpu, vmf);
2728 get_page(page);
2729 vmf->page = page;
2730 return 0;
2731 }
2732
2733 static const struct vm_operations_struct kvm_vcpu_vm_ops = {
2734 .fault = kvm_vcpu_fault,
2735 };
2736
2737 static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
2738 {
2739 vma->vm_ops = &kvm_vcpu_vm_ops;
2740 return 0;
2741 }
2742
2743 static int kvm_vcpu_release(struct inode *inode, struct file *filp)
2744 {
2745 struct kvm_vcpu *vcpu = filp->private_data;
2746
2747 debugfs_remove_recursive(vcpu->debugfs_dentry);
2748 kvm_put_kvm(vcpu->kvm);
2749 return 0;
2750 }
2751
2752 static struct file_operations kvm_vcpu_fops = {
2753 .release = kvm_vcpu_release,
2754 .unlocked_ioctl = kvm_vcpu_ioctl,
2755 .mmap = kvm_vcpu_mmap,
2756 .llseek = noop_llseek,
2757 KVM_COMPAT(kvm_vcpu_compat_ioctl),
2758 };
2759
2760
2761
2762
2763 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
2764 {
2765 char name[8 + 1 + ITOA_MAX_LEN + 1];
2766
2767 snprintf(name, sizeof(name), "kvm-vcpu:%d", vcpu->vcpu_id);
2768 return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
2769 }
2770
2771 static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2772 {
2773 #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
2774 char dir_name[ITOA_MAX_LEN * 2];
2775
2776 if (!debugfs_initialized())
2777 return;
2778
2779 snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id);
2780 vcpu->debugfs_dentry = debugfs_create_dir(dir_name,
2781 vcpu->kvm->debugfs_dentry);
2782
2783 kvm_arch_create_vcpu_debugfs(vcpu);
2784 #endif
2785 }
2786
2787
2788
2789
2790 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
2791 {
2792 int r;
2793 struct kvm_vcpu *vcpu;
2794
2795 if (id >= KVM_MAX_VCPU_ID)
2796 return -EINVAL;
2797
2798 mutex_lock(&kvm->lock);
2799 if (kvm->created_vcpus == KVM_MAX_VCPUS) {
2800 mutex_unlock(&kvm->lock);
2801 return -EINVAL;
2802 }
2803
2804 kvm->created_vcpus++;
2805 mutex_unlock(&kvm->lock);
2806
2807 vcpu = kvm_arch_vcpu_create(kvm, id);
2808 if (IS_ERR(vcpu)) {
2809 r = PTR_ERR(vcpu);
2810 goto vcpu_decrement;
2811 }
2812
2813 preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
2814
2815 r = kvm_arch_vcpu_setup(vcpu);
2816 if (r)
2817 goto vcpu_destroy;
2818
2819 kvm_create_vcpu_debugfs(vcpu);
2820
2821 mutex_lock(&kvm->lock);
2822 if (kvm_get_vcpu_by_id(kvm, id)) {
2823 r = -EEXIST;
2824 goto unlock_vcpu_destroy;
2825 }
2826
2827 BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
2828
2829
2830 kvm_get_kvm(kvm);
2831 r = create_vcpu_fd(vcpu);
2832 if (r < 0) {
2833 kvm_put_kvm(kvm);
2834 goto unlock_vcpu_destroy;
2835 }
2836
2837 kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
2838
2839
2840
2841
2842
2843 smp_wmb();
2844 atomic_inc(&kvm->online_vcpus);
2845
2846 mutex_unlock(&kvm->lock);
2847 kvm_arch_vcpu_postcreate(vcpu);
2848 return r;
2849
2850 unlock_vcpu_destroy:
2851 mutex_unlock(&kvm->lock);
2852 debugfs_remove_recursive(vcpu->debugfs_dentry);
2853 vcpu_destroy:
2854 kvm_arch_vcpu_destroy(vcpu);
2855 vcpu_decrement:
2856 mutex_lock(&kvm->lock);
2857 kvm->created_vcpus--;
2858 mutex_unlock(&kvm->lock);
2859 return r;
2860 }
2861
2862 static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
2863 {
2864 if (sigset) {
2865 sigdelsetmask(sigset, sigmask(SIGKILL)|sigmask(SIGSTOP));
2866 vcpu->sigset_active = 1;
2867 vcpu->sigset = *sigset;
2868 } else
2869 vcpu->sigset_active = 0;
2870 return 0;
2871 }
2872
2873 static long kvm_vcpu_ioctl(struct file *filp,
2874 unsigned int ioctl, unsigned long arg)
2875 {
2876 struct kvm_vcpu *vcpu = filp->private_data;
2877 void __user *argp = (void __user *)arg;
2878 int r;
2879 struct kvm_fpu *fpu = NULL;
2880 struct kvm_sregs *kvm_sregs = NULL;
2881
2882 if (vcpu->kvm->mm != current->mm)
2883 return -EIO;
2884
2885 if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
2886 return -EINVAL;
2887
2888
2889
2890
2891
2892 r = kvm_arch_vcpu_async_ioctl(filp, ioctl, arg);
2893 if (r != -ENOIOCTLCMD)
2894 return r;
2895
2896 if (mutex_lock_killable(&vcpu->mutex))
2897 return -EINTR;
2898 switch (ioctl) {
2899 case KVM_RUN: {
2900 struct pid *oldpid;
2901 r = -EINVAL;
2902 if (arg)
2903 goto out;
2904 oldpid = rcu_access_pointer(vcpu->pid);
2905 if (unlikely(oldpid != task_pid(current))) {
2906
2907 struct pid *newpid;
2908
2909 r = kvm_arch_vcpu_run_pid_change(vcpu);
2910 if (r)
2911 break;
2912
2913 newpid = get_task_pid(current, PIDTYPE_PID);
2914 rcu_assign_pointer(vcpu->pid, newpid);
2915 if (oldpid)
2916 synchronize_rcu();
2917 put_pid(oldpid);
2918 }
2919 r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
2920 trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
2921 break;
2922 }
2923 case KVM_GET_REGS: {
2924 struct kvm_regs *kvm_regs;
2925
2926 r = -ENOMEM;
2927 kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT);
2928 if (!kvm_regs)
2929 goto out;
2930 r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
2931 if (r)
2932 goto out_free1;
2933 r = -EFAULT;
2934 if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
2935 goto out_free1;
2936 r = 0;
2937 out_free1:
2938 kfree(kvm_regs);
2939 break;
2940 }
2941 case KVM_SET_REGS: {
2942 struct kvm_regs *kvm_regs;
2943
2944 r = -ENOMEM;
2945 kvm_regs = memdup_user(argp, sizeof(*kvm_regs));
2946 if (IS_ERR(kvm_regs)) {
2947 r = PTR_ERR(kvm_regs);
2948 goto out;
2949 }
2950 r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
2951 kfree(kvm_regs);
2952 break;
2953 }
2954 case KVM_GET_SREGS: {
2955 kvm_sregs = kzalloc(sizeof(struct kvm_sregs),
2956 GFP_KERNEL_ACCOUNT);
2957 r = -ENOMEM;
2958 if (!kvm_sregs)
2959 goto out;
2960 r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs);
2961 if (r)
2962 goto out;
2963 r = -EFAULT;
2964 if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs)))
2965 goto out;
2966 r = 0;
2967 break;
2968 }
2969 case KVM_SET_SREGS: {
2970 kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs));
2971 if (IS_ERR(kvm_sregs)) {
2972 r = PTR_ERR(kvm_sregs);
2973 kvm_sregs = NULL;
2974 goto out;
2975 }
2976 r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
2977 break;
2978 }
2979 case KVM_GET_MP_STATE: {
2980 struct kvm_mp_state mp_state;
2981
2982 r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state);
2983 if (r)
2984 goto out;
2985 r = -EFAULT;
2986 if (copy_to_user(argp, &mp_state, sizeof(mp_state)))
2987 goto out;
2988 r = 0;
2989 break;
2990 }
2991 case KVM_SET_MP_STATE: {
2992 struct kvm_mp_state mp_state;
2993
2994 r = -EFAULT;
2995 if (copy_from_user(&mp_state, argp, sizeof(mp_state)))
2996 goto out;
2997 r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
2998 break;
2999 }
3000 case KVM_TRANSLATE: {
3001 struct kvm_translation tr;
3002
3003 r = -EFAULT;
3004 if (copy_from_user(&tr, argp, sizeof(tr)))
3005 goto out;
3006 r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
3007 if (r)
3008 goto out;
3009 r = -EFAULT;
3010 if (copy_to_user(argp, &tr, sizeof(tr)))
3011 goto out;
3012 r = 0;
3013 break;
3014 }
3015 case KVM_SET_GUEST_DEBUG: {
3016 struct kvm_guest_debug dbg;
3017
3018 r = -EFAULT;
3019 if (copy_from_user(&dbg, argp, sizeof(dbg)))
3020 goto out;
3021 r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
3022 break;
3023 }
3024 case KVM_SET_SIGNAL_MASK: {
3025 struct kvm_signal_mask __user *sigmask_arg = argp;
3026 struct kvm_signal_mask kvm_sigmask;
3027 sigset_t sigset, *p;
3028
3029 p = NULL;
3030 if (argp) {
3031 r = -EFAULT;
3032 if (copy_from_user(&kvm_sigmask, argp,
3033 sizeof(kvm_sigmask)))
3034 goto out;
3035 r = -EINVAL;
3036 if (kvm_sigmask.len != sizeof(sigset))
3037 goto out;
3038 r = -EFAULT;
3039 if (copy_from_user(&sigset, sigmask_arg->sigset,
3040 sizeof(sigset)))
3041 goto out;
3042 p = &sigset;
3043 }
3044 r = kvm_vcpu_ioctl_set_sigmask(vcpu, p);
3045 break;
3046 }
3047 case KVM_GET_FPU: {
3048 fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT);
3049 r = -ENOMEM;
3050 if (!fpu)
3051 goto out;
3052 r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu);
3053 if (r)
3054 goto out;
3055 r = -EFAULT;
3056 if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu)))
3057 goto out;
3058 r = 0;
3059 break;
3060 }
3061 case KVM_SET_FPU: {
3062 fpu = memdup_user(argp, sizeof(*fpu));
3063 if (IS_ERR(fpu)) {
3064 r = PTR_ERR(fpu);
3065 fpu = NULL;
3066 goto out;
3067 }
3068 r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
3069 break;
3070 }
3071 default:
3072 r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
3073 }
3074 out:
3075 mutex_unlock(&vcpu->mutex);
3076 kfree(fpu);
3077 kfree(kvm_sregs);
3078 return r;
3079 }
3080
3081 #ifdef CONFIG_KVM_COMPAT
3082 static long kvm_vcpu_compat_ioctl(struct file *filp,
3083 unsigned int ioctl, unsigned long arg)
3084 {
3085 struct kvm_vcpu *vcpu = filp->private_data;
3086 void __user *argp = compat_ptr(arg);
3087 int r;
3088
3089 if (vcpu->kvm->mm != current->mm)
3090 return -EIO;
3091
3092 switch (ioctl) {
3093 case KVM_SET_SIGNAL_MASK: {
3094 struct kvm_signal_mask __user *sigmask_arg = argp;
3095 struct kvm_signal_mask kvm_sigmask;
3096 sigset_t sigset;
3097
3098 if (argp) {
3099 r = -EFAULT;
3100 if (copy_from_user(&kvm_sigmask, argp,
3101 sizeof(kvm_sigmask)))
3102 goto out;
3103 r = -EINVAL;
3104 if (kvm_sigmask.len != sizeof(compat_sigset_t))
3105 goto out;
3106 r = -EFAULT;
3107 if (get_compat_sigset(&sigset, (void *)sigmask_arg->sigset))
3108 goto out;
3109 r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
3110 } else
3111 r = kvm_vcpu_ioctl_set_sigmask(vcpu, NULL);
3112 break;
3113 }
3114 default:
3115 r = kvm_vcpu_ioctl(filp, ioctl, arg);
3116 }
3117
3118 out:
3119 return r;
3120 }
3121 #endif
3122
3123 static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma)
3124 {
3125 struct kvm_device *dev = filp->private_data;
3126
3127 if (dev->ops->mmap)
3128 return dev->ops->mmap(dev, vma);
3129
3130 return -ENODEV;
3131 }
3132
3133 static int kvm_device_ioctl_attr(struct kvm_device *dev,
3134 int (*accessor)(struct kvm_device *dev,
3135 struct kvm_device_attr *attr),
3136 unsigned long arg)
3137 {
3138 struct kvm_device_attr attr;
3139
3140 if (!accessor)
3141 return -EPERM;
3142
3143 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
3144 return -EFAULT;
3145
3146 return accessor(dev, &attr);
3147 }
3148
3149 static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
3150 unsigned long arg)
3151 {
3152 struct kvm_device *dev = filp->private_data;
3153
3154 if (dev->kvm->mm != current->mm)
3155 return -EIO;
3156
3157 switch (ioctl) {
3158 case KVM_SET_DEVICE_ATTR:
3159 return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);
3160 case KVM_GET_DEVICE_ATTR:
3161 return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg);
3162 case KVM_HAS_DEVICE_ATTR:
3163 return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg);
3164 default:
3165 if (dev->ops->ioctl)
3166 return dev->ops->ioctl(dev, ioctl, arg);
3167
3168 return -ENOTTY;
3169 }
3170 }
3171
3172 static int kvm_device_release(struct inode *inode, struct file *filp)
3173 {
3174 struct kvm_device *dev = filp->private_data;
3175 struct kvm *kvm = dev->kvm;
3176
3177 if (dev->ops->release) {
3178 mutex_lock(&kvm->lock);
3179 list_del(&dev->vm_node);
3180 dev->ops->release(dev);
3181 mutex_unlock(&kvm->lock);
3182 }
3183
3184 kvm_put_kvm(kvm);
3185 return 0;
3186 }
3187
3188 static const struct file_operations kvm_device_fops = {
3189 .unlocked_ioctl = kvm_device_ioctl,
3190 .release = kvm_device_release,
3191 KVM_COMPAT(kvm_device_ioctl),
3192 .mmap = kvm_device_mmap,
3193 };
3194
3195 struct kvm_device *kvm_device_from_filp(struct file *filp)
3196 {
3197 if (filp->f_op != &kvm_device_fops)
3198 return NULL;
3199
3200 return filp->private_data;
3201 }
3202
3203 static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
3204 #ifdef CONFIG_KVM_MPIC
3205 [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
3206 [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
3207 #endif
3208 };
3209
3210 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
3211 {
3212 if (type >= ARRAY_SIZE(kvm_device_ops_table))
3213 return -ENOSPC;
3214
3215 if (kvm_device_ops_table[type] != NULL)
3216 return -EEXIST;
3217
3218 kvm_device_ops_table[type] = ops;
3219 return 0;
3220 }
3221
3222 void kvm_unregister_device_ops(u32 type)
3223 {
3224 if (kvm_device_ops_table[type] != NULL)
3225 kvm_device_ops_table[type] = NULL;
3226 }
3227
3228 static int kvm_ioctl_create_device(struct kvm *kvm,
3229 struct kvm_create_device *cd)
3230 {
3231 struct kvm_device_ops *ops = NULL;
3232 struct kvm_device *dev;
3233 bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
3234 int type;
3235 int ret;
3236
3237 if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
3238 return -ENODEV;
3239
3240 type = array_index_nospec(cd->type, ARRAY_SIZE(kvm_device_ops_table));
3241 ops = kvm_device_ops_table[type];
3242 if (ops == NULL)
3243 return -ENODEV;
3244
3245 if (test)
3246 return 0;
3247
3248 dev = kzalloc(sizeof(*dev), GFP_KERNEL_ACCOUNT);
3249 if (!dev)
3250 return -ENOMEM;
3251
3252 dev->ops = ops;
3253 dev->kvm = kvm;
3254
3255 mutex_lock(&kvm->lock);
3256 ret = ops->create(dev, type);
3257 if (ret < 0) {
3258 mutex_unlock(&kvm->lock);
3259 kfree(dev);
3260 return ret;
3261 }
3262 list_add(&dev->vm_node, &kvm->devices);
3263 mutex_unlock(&kvm->lock);
3264
3265 if (ops->init)
3266 ops->init(dev);
3267
3268 kvm_get_kvm(kvm);
3269 ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
3270 if (ret < 0) {
3271 kvm_put_kvm(kvm);
3272 mutex_lock(&kvm->lock);
3273 list_del(&dev->vm_node);
3274 mutex_unlock(&kvm->lock);
3275 ops->destroy(dev);
3276 return ret;
3277 }
3278
3279 cd->fd = ret;
3280 return 0;
3281 }
3282
3283 static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
3284 {
3285 switch (arg) {
3286 case KVM_CAP_USER_MEMORY:
3287 case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
3288 case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
3289 case KVM_CAP_INTERNAL_ERROR_DATA:
3290 #ifdef CONFIG_HAVE_KVM_MSI
3291 case KVM_CAP_SIGNAL_MSI:
3292 #endif
3293 #ifdef CONFIG_HAVE_KVM_IRQFD
3294 case KVM_CAP_IRQFD:
3295 case KVM_CAP_IRQFD_RESAMPLE:
3296 #endif
3297 case KVM_CAP_IOEVENTFD_ANY_LENGTH:
3298 case KVM_CAP_CHECK_EXTENSION_VM:
3299 case KVM_CAP_ENABLE_CAP_VM:
3300 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
3301 case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
3302 #endif
3303 return 1;
3304 #ifdef CONFIG_KVM_MMIO
3305 case KVM_CAP_COALESCED_MMIO:
3306 return KVM_COALESCED_MMIO_PAGE_OFFSET;
3307 case KVM_CAP_COALESCED_PIO:
3308 return 1;
3309 #endif
3310 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
3311 case KVM_CAP_IRQ_ROUTING:
3312 return KVM_MAX_IRQ_ROUTES;
3313 #endif
3314 #if KVM_ADDRESS_SPACE_NUM > 1
3315 case KVM_CAP_MULTI_ADDRESS_SPACE:
3316 return KVM_ADDRESS_SPACE_NUM;
3317 #endif
3318 case KVM_CAP_NR_MEMSLOTS:
3319 return KVM_USER_MEM_SLOTS;
3320 default:
3321 break;
3322 }
3323 return kvm_vm_ioctl_check_extension(kvm, arg);
3324 }
3325
3326 int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
3327 struct kvm_enable_cap *cap)
3328 {
3329 return -EINVAL;
3330 }
3331
3332 static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
3333 struct kvm_enable_cap *cap)
3334 {
3335 switch (cap->cap) {
3336 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
3337 case KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2:
3338 if (cap->flags || (cap->args[0] & ~1))
3339 return -EINVAL;
3340 kvm->manual_dirty_log_protect = cap->args[0];
3341 return 0;
3342 #endif
3343 default:
3344 return kvm_vm_ioctl_enable_cap(kvm, cap);
3345 }
3346 }
3347
3348 static long kvm_vm_ioctl(struct file *filp,
3349 unsigned int ioctl, unsigned long arg)
3350 {
3351 struct kvm *kvm = filp->private_data;
3352 void __user *argp = (void __user *)arg;
3353 int r;
3354
3355 if (kvm->mm != current->mm)
3356 return -EIO;
3357 switch (ioctl) {
3358 case KVM_CREATE_VCPU:
3359 r = kvm_vm_ioctl_create_vcpu(kvm, arg);
3360 break;
3361 case KVM_ENABLE_CAP: {
3362 struct kvm_enable_cap cap;
3363
3364 r = -EFAULT;
3365 if (copy_from_user(&cap, argp, sizeof(cap)))
3366 goto out;
3367 r = kvm_vm_ioctl_enable_cap_generic(kvm, &cap);
3368 break;
3369 }
3370 case KVM_SET_USER_MEMORY_REGION: {
3371 struct kvm_userspace_memory_region kvm_userspace_mem;
3372
3373 r = -EFAULT;
3374 if (copy_from_user(&kvm_userspace_mem, argp,
3375 sizeof(kvm_userspace_mem)))
3376 goto out;
3377
3378 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
3379 break;
3380 }
3381 case KVM_GET_DIRTY_LOG: {
3382 struct kvm_dirty_log log;
3383
3384 r = -EFAULT;
3385 if (copy_from_user(&log, argp, sizeof(log)))
3386 goto out;
3387 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
3388 break;
3389 }
3390 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
3391 case KVM_CLEAR_DIRTY_LOG: {
3392 struct kvm_clear_dirty_log log;
3393
3394 r = -EFAULT;
3395 if (copy_from_user(&log, argp, sizeof(log)))
3396 goto out;
3397 r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
3398 break;
3399 }
3400 #endif
3401 #ifdef CONFIG_KVM_MMIO
3402 case KVM_REGISTER_COALESCED_MMIO: {
3403 struct kvm_coalesced_mmio_zone zone;
3404
3405 r = -EFAULT;
3406 if (copy_from_user(&zone, argp, sizeof(zone)))
3407 goto out;
3408 r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
3409 break;
3410 }
3411 case KVM_UNREGISTER_COALESCED_MMIO: {
3412 struct kvm_coalesced_mmio_zone zone;
3413
3414 r = -EFAULT;
3415 if (copy_from_user(&zone, argp, sizeof(zone)))
3416 goto out;
3417 r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
3418 break;
3419 }
3420 #endif
3421 case KVM_IRQFD: {
3422 struct kvm_irqfd data;
3423
3424 r = -EFAULT;
3425 if (copy_from_user(&data, argp, sizeof(data)))
3426 goto out;
3427 r = kvm_irqfd(kvm, &data);
3428 break;
3429 }
3430 case KVM_IOEVENTFD: {
3431 struct kvm_ioeventfd data;
3432
3433 r = -EFAULT;
3434 if (copy_from_user(&data, argp, sizeof(data)))
3435 goto out;
3436 r = kvm_ioeventfd(kvm, &data);
3437 break;
3438 }
3439 #ifdef CONFIG_HAVE_KVM_MSI
3440 case KVM_SIGNAL_MSI: {
3441 struct kvm_msi msi;
3442
3443 r = -EFAULT;
3444 if (copy_from_user(&msi, argp, sizeof(msi)))
3445 goto out;
3446 r = kvm_send_userspace_msi(kvm, &msi);
3447 break;
3448 }
3449 #endif
3450 #ifdef __KVM_HAVE_IRQ_LINE
3451 case KVM_IRQ_LINE_STATUS:
3452 case KVM_IRQ_LINE: {
3453 struct kvm_irq_level irq_event;
3454
3455 r = -EFAULT;
3456 if (copy_from_user(&irq_event, argp, sizeof(irq_event)))
3457 goto out;
3458
3459 r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
3460 ioctl == KVM_IRQ_LINE_STATUS);
3461 if (r)
3462 goto out;
3463
3464 r = -EFAULT;
3465 if (ioctl == KVM_IRQ_LINE_STATUS) {
3466 if (copy_to_user(argp, &irq_event, sizeof(irq_event)))
3467 goto out;
3468 }
3469
3470 r = 0;
3471 break;
3472 }
3473 #endif
3474 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
3475 case KVM_SET_GSI_ROUTING: {
3476 struct kvm_irq_routing routing;
3477 struct kvm_irq_routing __user *urouting;
3478 struct kvm_irq_routing_entry *entries = NULL;
3479
3480 r = -EFAULT;
3481 if (copy_from_user(&routing, argp, sizeof(routing)))
3482 goto out;
3483 r = -EINVAL;
3484 if (!kvm_arch_can_set_irq_routing(kvm))
3485 goto out;
3486 if (routing.nr > KVM_MAX_IRQ_ROUTES)
3487 goto out;
3488 if (routing.flags)
3489 goto out;
3490 if (routing.nr) {
3491 r = -ENOMEM;
3492 entries = vmalloc(array_size(sizeof(*entries),
3493 routing.nr));
3494 if (!entries)
3495 goto out;
3496 r = -EFAULT;
3497 urouting = argp;
3498 if (copy_from_user(entries, urouting->entries,
3499 routing.nr * sizeof(*entries)))
3500 goto out_free_irq_routing;
3501 }
3502 r = kvm_set_irq_routing(kvm, entries, routing.nr,
3503 routing.flags);
3504 out_free_irq_routing:
3505 vfree(entries);
3506 break;
3507 }
3508 #endif
3509 case KVM_CREATE_DEVICE: {
3510 struct kvm_create_device cd;
3511
3512 r = -EFAULT;
3513 if (copy_from_user(&cd, argp, sizeof(cd)))
3514 goto out;
3515
3516 r = kvm_ioctl_create_device(kvm, &cd);
3517 if (r)
3518 goto out;
3519
3520 r = -EFAULT;
3521 if (copy_to_user(argp, &cd, sizeof(cd)))
3522 goto out;
3523
3524 r = 0;
3525 break;
3526 }
3527 case KVM_CHECK_EXTENSION:
3528 r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
3529 break;
3530 default:
3531 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
3532 }
3533 out:
3534 return r;
3535 }
3536
3537 #ifdef CONFIG_KVM_COMPAT
3538 struct compat_kvm_dirty_log {
3539 __u32 slot;
3540 __u32 padding1;
3541 union {
3542 compat_uptr_t dirty_bitmap;
3543 __u64 padding2;
3544 };
3545 };
3546
3547 static long kvm_vm_compat_ioctl(struct file *filp,
3548 unsigned int ioctl, unsigned long arg)
3549 {
3550 struct kvm *kvm = filp->private_data;
3551 int r;
3552
3553 if (kvm->mm != current->mm)
3554 return -EIO;
3555 switch (ioctl) {
3556 case KVM_GET_DIRTY_LOG: {
3557 struct compat_kvm_dirty_log compat_log;
3558 struct kvm_dirty_log log;
3559
3560 if (copy_from_user(&compat_log, (void __user *)arg,
3561 sizeof(compat_log)))
3562 return -EFAULT;
3563 log.slot = compat_log.slot;
3564 log.padding1 = compat_log.padding1;
3565 log.padding2 = compat_log.padding2;
3566 log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
3567
3568 r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
3569 break;
3570 }
3571 default:
3572 r = kvm_vm_ioctl(filp, ioctl, arg);
3573 }
3574 return r;
3575 }
3576 #endif
3577
3578 static struct file_operations kvm_vm_fops = {
3579 .release = kvm_vm_release,
3580 .unlocked_ioctl = kvm_vm_ioctl,
3581 .llseek = noop_llseek,
3582 KVM_COMPAT(kvm_vm_compat_ioctl),
3583 };
3584
3585 static int kvm_dev_ioctl_create_vm(unsigned long type)
3586 {
3587 int r;
3588 struct kvm *kvm;
3589 struct file *file;
3590
3591 kvm = kvm_create_vm(type);
3592 if (IS_ERR(kvm))
3593 return PTR_ERR(kvm);
3594 #ifdef CONFIG_KVM_MMIO
3595 r = kvm_coalesced_mmio_init(kvm);
3596 if (r < 0)
3597 goto put_kvm;
3598 #endif
3599 r = get_unused_fd_flags(O_CLOEXEC);
3600 if (r < 0)
3601 goto put_kvm;
3602
3603 file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
3604 if (IS_ERR(file)) {
3605 put_unused_fd(r);
3606 r = PTR_ERR(file);
3607 goto put_kvm;
3608 }
3609
3610
3611
3612
3613
3614
3615
3616 if (kvm_create_vm_debugfs(kvm, r) < 0) {
3617 put_unused_fd(r);
3618 fput(file);
3619 return -ENOMEM;
3620 }
3621 kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
3622
3623 fd_install(r, file);
3624 return r;
3625
3626 put_kvm:
3627 kvm_put_kvm(kvm);
3628 return r;
3629 }
3630
3631 static long kvm_dev_ioctl(struct file *filp,
3632 unsigned int ioctl, unsigned long arg)
3633 {
3634 long r = -EINVAL;
3635
3636 switch (ioctl) {
3637 case KVM_GET_API_VERSION:
3638 if (arg)
3639 goto out;
3640 r = KVM_API_VERSION;
3641 break;
3642 case KVM_CREATE_VM:
3643 r = kvm_dev_ioctl_create_vm(arg);
3644 break;
3645 case KVM_CHECK_EXTENSION:
3646 r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
3647 break;
3648 case KVM_GET_VCPU_MMAP_SIZE:
3649 if (arg)
3650 goto out;
3651 r = PAGE_SIZE;
3652 #ifdef CONFIG_X86
3653 r += PAGE_SIZE;
3654 #endif
3655 #ifdef CONFIG_KVM_MMIO
3656 r += PAGE_SIZE;
3657 #endif
3658 break;
3659 case KVM_TRACE_ENABLE:
3660 case KVM_TRACE_PAUSE:
3661 case KVM_TRACE_DISABLE:
3662 r = -EOPNOTSUPP;
3663 break;
3664 default:
3665 return kvm_arch_dev_ioctl(filp, ioctl, arg);
3666 }
3667 out:
3668 return r;
3669 }
3670
3671 static struct file_operations kvm_chardev_ops = {
3672 .unlocked_ioctl = kvm_dev_ioctl,
3673 .llseek = noop_llseek,
3674 KVM_COMPAT(kvm_dev_ioctl),
3675 };
3676
3677 static struct miscdevice kvm_dev = {
3678 KVM_MINOR,
3679 "kvm",
3680 &kvm_chardev_ops,
3681 };
3682
3683 static void hardware_enable_nolock(void *junk)
3684 {
3685 int cpu = raw_smp_processor_id();
3686 int r;
3687
3688 if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
3689 return;
3690
3691 cpumask_set_cpu(cpu, cpus_hardware_enabled);
3692
3693 r = kvm_arch_hardware_enable();
3694
3695 if (r) {
3696 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
3697 atomic_inc(&hardware_enable_failed);
3698 pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu);
3699 }
3700 }
3701
3702 static int kvm_starting_cpu(unsigned int cpu)
3703 {
3704 raw_spin_lock(&kvm_count_lock);
3705 if (kvm_usage_count)
3706 hardware_enable_nolock(NULL);
3707 raw_spin_unlock(&kvm_count_lock);
3708 return 0;
3709 }
3710
3711 static void hardware_disable_nolock(void *junk)
3712 {
3713 int cpu = raw_smp_processor_id();
3714
3715 if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
3716 return;
3717 cpumask_clear_cpu(cpu, cpus_hardware_enabled);
3718 kvm_arch_hardware_disable();
3719 }
3720
3721 static int kvm_dying_cpu(unsigned int cpu)
3722 {
3723 raw_spin_lock(&kvm_count_lock);
3724 if (kvm_usage_count)
3725 hardware_disable_nolock(NULL);
3726 raw_spin_unlock(&kvm_count_lock);
3727 return 0;
3728 }
3729
3730 static void hardware_disable_all_nolock(void)
3731 {
3732 BUG_ON(!kvm_usage_count);
3733
3734 kvm_usage_count--;
3735 if (!kvm_usage_count)
3736 on_each_cpu(hardware_disable_nolock, NULL, 1);
3737 }
3738
3739 static void hardware_disable_all(void)
3740 {
3741 raw_spin_lock(&kvm_count_lock);
3742 hardware_disable_all_nolock();
3743 raw_spin_unlock(&kvm_count_lock);
3744 }
3745
3746 static int hardware_enable_all(void)
3747 {
3748 int r = 0;
3749
3750 raw_spin_lock(&kvm_count_lock);
3751
3752 kvm_usage_count++;
3753 if (kvm_usage_count == 1) {
3754 atomic_set(&hardware_enable_failed, 0);
3755 on_each_cpu(hardware_enable_nolock, NULL, 1);
3756
3757 if (atomic_read(&hardware_enable_failed)) {
3758 hardware_disable_all_nolock();
3759 r = -EBUSY;
3760 }
3761 }
3762
3763 raw_spin_unlock(&kvm_count_lock);
3764
3765 return r;
3766 }
3767
3768 static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
3769 void *v)
3770 {
3771
3772
3773
3774
3775
3776
3777 pr_info("kvm: exiting hardware virtualization\n");
3778 kvm_rebooting = true;
3779 on_each_cpu(hardware_disable_nolock, NULL, 1);
3780 return NOTIFY_OK;
3781 }
3782
3783 static struct notifier_block kvm_reboot_notifier = {
3784 .notifier_call = kvm_reboot,
3785 .priority = 0,
3786 };
3787
3788 static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
3789 {
3790 int i;
3791
3792 for (i = 0; i < bus->dev_count; i++) {
3793 struct kvm_io_device *pos = bus->range[i].dev;
3794
3795 kvm_iodevice_destructor(pos);
3796 }
3797 kfree(bus);
3798 }
3799
3800 static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1,
3801 const struct kvm_io_range *r2)
3802 {
3803 gpa_t addr1 = r1->addr;
3804 gpa_t addr2 = r2->addr;
3805
3806 if (addr1 < addr2)
3807 return -1;
3808
3809
3810
3811
3812
3813
3814 if (r2->len) {
3815 addr1 += r1->len;
3816 addr2 += r2->len;
3817 }
3818
3819 if (addr1 > addr2)
3820 return 1;
3821
3822 return 0;
3823 }
3824
3825 static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
3826 {
3827 return kvm_io_bus_cmp(p1, p2);
3828 }
3829
3830 static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
3831 gpa_t addr, int len)
3832 {
3833 struct kvm_io_range *range, key;
3834 int off;
3835
3836 key = (struct kvm_io_range) {
3837 .addr = addr,
3838 .len = len,
3839 };
3840
3841 range = bsearch(&key, bus->range, bus->dev_count,
3842 sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp);
3843 if (range == NULL)
3844 return -ENOENT;
3845
3846 off = range - bus->range;
3847
3848 while (off > 0 && kvm_io_bus_cmp(&key, &bus->range[off-1]) == 0)
3849 off--;
3850
3851 return off;
3852 }
3853
3854 static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
3855 struct kvm_io_range *range, const void *val)
3856 {
3857 int idx;
3858
3859 idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len);
3860 if (idx < 0)
3861 return -EOPNOTSUPP;
3862
3863 while (idx < bus->dev_count &&
3864 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
3865 if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr,
3866 range->len, val))
3867 return idx;
3868 idx++;
3869 }
3870
3871 return -EOPNOTSUPP;
3872 }
3873
3874
3875 int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
3876 int len, const void *val)
3877 {
3878 struct kvm_io_bus *bus;
3879 struct kvm_io_range range;
3880 int r;
3881
3882 range = (struct kvm_io_range) {
3883 .addr = addr,
3884 .len = len,
3885 };
3886
3887 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
3888 if (!bus)
3889 return -ENOMEM;
3890 r = __kvm_io_bus_write(vcpu, bus, &range, val);
3891 return r < 0 ? r : 0;
3892 }
3893 EXPORT_SYMBOL_GPL(kvm_io_bus_write);
3894
3895
3896 int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
3897 gpa_t addr, int len, const void *val, long cookie)
3898 {
3899 struct kvm_io_bus *bus;
3900 struct kvm_io_range range;
3901
3902 range = (struct kvm_io_range) {
3903 .addr = addr,
3904 .len = len,
3905 };
3906
3907 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
3908 if (!bus)
3909 return -ENOMEM;
3910
3911
3912 if ((cookie >= 0) && (cookie < bus->dev_count) &&
3913 (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0))
3914 if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len,
3915 val))
3916 return cookie;
3917
3918
3919
3920
3921
3922 return __kvm_io_bus_write(vcpu, bus, &range, val);
3923 }
3924
3925 static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
3926 struct kvm_io_range *range, void *val)
3927 {
3928 int idx;
3929
3930 idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len);
3931 if (idx < 0)
3932 return -EOPNOTSUPP;
3933
3934 while (idx < bus->dev_count &&
3935 kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
3936 if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr,
3937 range->len, val))
3938 return idx;
3939 idx++;
3940 }
3941
3942 return -EOPNOTSUPP;
3943 }
3944
3945
3946 int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
3947 int len, void *val)
3948 {
3949 struct kvm_io_bus *bus;
3950 struct kvm_io_range range;
3951 int r;
3952
3953 range = (struct kvm_io_range) {
3954 .addr = addr,
3955 .len = len,
3956 };
3957
3958 bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
3959 if (!bus)
3960 return -ENOMEM;
3961 r = __kvm_io_bus_read(vcpu, bus, &range, val);
3962 return r < 0 ? r : 0;
3963 }
3964
3965
3966 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
3967 int len, struct kvm_io_device *dev)
3968 {
3969 int i;
3970 struct kvm_io_bus *new_bus, *bus;
3971 struct kvm_io_range range;
3972
3973 bus = kvm_get_bus(kvm, bus_idx);
3974 if (!bus)
3975 return -ENOMEM;
3976
3977
3978 if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
3979 return -ENOSPC;
3980
3981 new_bus = kmalloc(struct_size(bus, range, bus->dev_count + 1),
3982 GFP_KERNEL_ACCOUNT);
3983 if (!new_bus)
3984 return -ENOMEM;
3985
3986 range = (struct kvm_io_range) {
3987 .addr = addr,
3988 .len = len,
3989 .dev = dev,
3990 };
3991
3992 for (i = 0; i < bus->dev_count; i++)
3993 if (kvm_io_bus_cmp(&bus->range[i], &range) > 0)
3994 break;
3995
3996 memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
3997 new_bus->dev_count++;
3998 new_bus->range[i] = range;
3999 memcpy(new_bus->range + i + 1, bus->range + i,
4000 (bus->dev_count - i) * sizeof(struct kvm_io_range));
4001 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
4002 synchronize_srcu_expedited(&kvm->srcu);
4003 kfree(bus);
4004
4005 return 0;
4006 }
4007
4008
4009 void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
4010 struct kvm_io_device *dev)
4011 {
4012 int i;
4013 struct kvm_io_bus *new_bus, *bus;
4014
4015 bus = kvm_get_bus(kvm, bus_idx);
4016 if (!bus)
4017 return;
4018
4019 for (i = 0; i < bus->dev_count; i++)
4020 if (bus->range[i].dev == dev) {
4021 break;
4022 }
4023
4024 if (i == bus->dev_count)
4025 return;
4026
4027 new_bus = kmalloc(struct_size(bus, range, bus->dev_count - 1),
4028 GFP_KERNEL_ACCOUNT);
4029 if (!new_bus) {
4030 pr_err("kvm: failed to shrink bus, removing it completely\n");
4031 goto broken;
4032 }
4033
4034 memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
4035 new_bus->dev_count--;
4036 memcpy(new_bus->range + i, bus->range + i + 1,
4037 (new_bus->dev_count - i) * sizeof(struct kvm_io_range));
4038
4039 broken:
4040 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
4041 synchronize_srcu_expedited(&kvm->srcu);
4042 kfree(bus);
4043 return;
4044 }
4045
4046 struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
4047 gpa_t addr)
4048 {
4049 struct kvm_io_bus *bus;
4050 int dev_idx, srcu_idx;
4051 struct kvm_io_device *iodev = NULL;
4052
4053 srcu_idx = srcu_read_lock(&kvm->srcu);
4054
4055 bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
4056 if (!bus)
4057 goto out_unlock;
4058
4059 dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
4060 if (dev_idx < 0)
4061 goto out_unlock;
4062
4063 iodev = bus->range[dev_idx].dev;
4064
4065 out_unlock:
4066 srcu_read_unlock(&kvm->srcu, srcu_idx);
4067
4068 return iodev;
4069 }
4070 EXPORT_SYMBOL_GPL(kvm_io_bus_get_dev);
4071
4072 static int kvm_debugfs_open(struct inode *inode, struct file *file,
4073 int (*get)(void *, u64 *), int (*set)(void *, u64),
4074 const char *fmt)
4075 {
4076 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
4077 inode->i_private;
4078
4079
4080
4081
4082
4083
4084 if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
4085 return -ENOENT;
4086
4087 if (simple_attr_open(inode, file, get,
4088 stat_data->mode & S_IWUGO ? set : NULL,
4089 fmt)) {
4090 kvm_put_kvm(stat_data->kvm);
4091 return -ENOMEM;
4092 }
4093
4094 return 0;
4095 }
4096
4097 static int kvm_debugfs_release(struct inode *inode, struct file *file)
4098 {
4099 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
4100 inode->i_private;
4101
4102 simple_attr_release(inode, file);
4103 kvm_put_kvm(stat_data->kvm);
4104
4105 return 0;
4106 }
4107
4108 static int vm_stat_get_per_vm(void *data, u64 *val)
4109 {
4110 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
4111
4112 *val = *(ulong *)((void *)stat_data->kvm + stat_data->offset);
4113
4114 return 0;
4115 }
4116
4117 static int vm_stat_clear_per_vm(void *data, u64 val)
4118 {
4119 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
4120
4121 if (val)
4122 return -EINVAL;
4123
4124 *(ulong *)((void *)stat_data->kvm + stat_data->offset) = 0;
4125
4126 return 0;
4127 }
4128
4129 static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file)
4130 {
4131 __simple_attr_check_format("%llu\n", 0ull);
4132 return kvm_debugfs_open(inode, file, vm_stat_get_per_vm,
4133 vm_stat_clear_per_vm, "%llu\n");
4134 }
4135
4136 static const struct file_operations vm_stat_get_per_vm_fops = {
4137 .owner = THIS_MODULE,
4138 .open = vm_stat_get_per_vm_open,
4139 .release = kvm_debugfs_release,
4140 .read = simple_attr_read,
4141 .write = simple_attr_write,
4142 .llseek = no_llseek,
4143 };
4144
4145 static int vcpu_stat_get_per_vm(void *data, u64 *val)
4146 {
4147 int i;
4148 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
4149 struct kvm_vcpu *vcpu;
4150
4151 *val = 0;
4152
4153 kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
4154 *val += *(u64 *)((void *)vcpu + stat_data->offset);
4155
4156 return 0;
4157 }
4158
4159 static int vcpu_stat_clear_per_vm(void *data, u64 val)
4160 {
4161 int i;
4162 struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
4163 struct kvm_vcpu *vcpu;
4164
4165 if (val)
4166 return -EINVAL;
4167
4168 kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
4169 *(u64 *)((void *)vcpu + stat_data->offset) = 0;
4170
4171 return 0;
4172 }
4173
4174 static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
4175 {
4176 __simple_attr_check_format("%llu\n", 0ull);
4177 return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm,
4178 vcpu_stat_clear_per_vm, "%llu\n");
4179 }
4180
4181 static const struct file_operations vcpu_stat_get_per_vm_fops = {
4182 .owner = THIS_MODULE,
4183 .open = vcpu_stat_get_per_vm_open,
4184 .release = kvm_debugfs_release,
4185 .read = simple_attr_read,
4186 .write = simple_attr_write,
4187 .llseek = no_llseek,
4188 };
4189
4190 static const struct file_operations *stat_fops_per_vm[] = {
4191 [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops,
4192 [KVM_STAT_VM] = &vm_stat_get_per_vm_fops,
4193 };
4194
4195 static int vm_stat_get(void *_offset, u64 *val)
4196 {
4197 unsigned offset = (long)_offset;
4198 struct kvm *kvm;
4199 struct kvm_stat_data stat_tmp = {.offset = offset};
4200 u64 tmp_val;
4201
4202 *val = 0;
4203 mutex_lock(&kvm_lock);
4204 list_for_each_entry(kvm, &vm_list, vm_list) {
4205 stat_tmp.kvm = kvm;
4206 vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
4207 *val += tmp_val;
4208 }
4209 mutex_unlock(&kvm_lock);
4210 return 0;
4211 }
4212
4213 static int vm_stat_clear(void *_offset, u64 val)
4214 {
4215 unsigned offset = (long)_offset;
4216 struct kvm *kvm;
4217 struct kvm_stat_data stat_tmp = {.offset = offset};
4218
4219 if (val)
4220 return -EINVAL;
4221
4222 mutex_lock(&kvm_lock);
4223 list_for_each_entry(kvm, &vm_list, vm_list) {
4224 stat_tmp.kvm = kvm;
4225 vm_stat_clear_per_vm((void *)&stat_tmp, 0);
4226 }
4227 mutex_unlock(&kvm_lock);
4228
4229 return 0;
4230 }
4231
4232 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, vm_stat_clear, "%llu\n");
4233
4234 static int vcpu_stat_get(void *_offset, u64 *val)
4235 {
4236 unsigned offset = (long)_offset;
4237 struct kvm *kvm;
4238 struct kvm_stat_data stat_tmp = {.offset = offset};
4239 u64 tmp_val;
4240
4241 *val = 0;
4242 mutex_lock(&kvm_lock);
4243 list_for_each_entry(kvm, &vm_list, vm_list) {
4244 stat_tmp.kvm = kvm;
4245 vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
4246 *val += tmp_val;
4247 }
4248 mutex_unlock(&kvm_lock);
4249 return 0;
4250 }
4251
4252 static int vcpu_stat_clear(void *_offset, u64 val)
4253 {
4254 unsigned offset = (long)_offset;
4255 struct kvm *kvm;
4256 struct kvm_stat_data stat_tmp = {.offset = offset};
4257
4258 if (val)
4259 return -EINVAL;
4260
4261 mutex_lock(&kvm_lock);
4262 list_for_each_entry(kvm, &vm_list, vm_list) {
4263 stat_tmp.kvm = kvm;
4264 vcpu_stat_clear_per_vm((void *)&stat_tmp, 0);
4265 }
4266 mutex_unlock(&kvm_lock);
4267
4268 return 0;
4269 }
4270
4271 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear,
4272 "%llu\n");
4273
4274 static const struct file_operations *stat_fops[] = {
4275 [KVM_STAT_VCPU] = &vcpu_stat_fops,
4276 [KVM_STAT_VM] = &vm_stat_fops,
4277 };
4278
4279 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
4280 {
4281 struct kobj_uevent_env *env;
4282 unsigned long long created, active;
4283
4284 if (!kvm_dev.this_device || !kvm)
4285 return;
4286
4287 mutex_lock(&kvm_lock);
4288 if (type == KVM_EVENT_CREATE_VM) {
4289 kvm_createvm_count++;
4290 kvm_active_vms++;
4291 } else if (type == KVM_EVENT_DESTROY_VM) {
4292 kvm_active_vms--;
4293 }
4294 created = kvm_createvm_count;
4295 active = kvm_active_vms;
4296 mutex_unlock(&kvm_lock);
4297
4298 env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT);
4299 if (!env)
4300 return;
4301
4302 add_uevent_var(env, "CREATED=%llu", created);
4303 add_uevent_var(env, "COUNT=%llu", active);
4304
4305 if (type == KVM_EVENT_CREATE_VM) {
4306 add_uevent_var(env, "EVENT=create");
4307 kvm->userspace_pid = task_pid_nr(current);
4308 } else if (type == KVM_EVENT_DESTROY_VM) {
4309 add_uevent_var(env, "EVENT=destroy");
4310 }
4311 add_uevent_var(env, "PID=%d", kvm->userspace_pid);
4312
4313 if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) {
4314 char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
4315
4316 if (p) {
4317 tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX);
4318 if (!IS_ERR(tmp))
4319 add_uevent_var(env, "STATS_PATH=%s", tmp);
4320 kfree(p);
4321 }
4322 }
4323
4324 env->envp[env->envp_idx++] = NULL;
4325 kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp);
4326 kfree(env);
4327 }
4328
4329 static void kvm_init_debug(void)
4330 {
4331 struct kvm_stats_debugfs_item *p;
4332
4333 kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
4334
4335 kvm_debugfs_num_entries = 0;
4336 for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
4337 int mode = p->mode ? p->mode : 0644;
4338 debugfs_create_file(p->name, mode, kvm_debugfs_dir,
4339 (void *)(long)p->offset,
4340 stat_fops[p->kind]);
4341 }
4342 }
4343
4344 static int kvm_suspend(void)
4345 {
4346 if (kvm_usage_count)
4347 hardware_disable_nolock(NULL);
4348 return 0;
4349 }
4350
4351 static void kvm_resume(void)
4352 {
4353 if (kvm_usage_count) {
4354 #ifdef CONFIG_LOCKDEP
4355 WARN_ON(lockdep_is_held(&kvm_count_lock));
4356 #endif
4357 hardware_enable_nolock(NULL);
4358 }
4359 }
4360
4361 static struct syscore_ops kvm_syscore_ops = {
4362 .suspend = kvm_suspend,
4363 .resume = kvm_resume,
4364 };
4365
4366 static inline
4367 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
4368 {
4369 return container_of(pn, struct kvm_vcpu, preempt_notifier);
4370 }
4371
4372 static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
4373 {
4374 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
4375
4376 WRITE_ONCE(vcpu->preempted, false);
4377 WRITE_ONCE(vcpu->ready, false);
4378
4379 kvm_arch_sched_in(vcpu, cpu);
4380
4381 kvm_arch_vcpu_load(vcpu, cpu);
4382 }
4383
4384 static void kvm_sched_out(struct preempt_notifier *pn,
4385 struct task_struct *next)
4386 {
4387 struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
4388
4389 if (current->state == TASK_RUNNING) {
4390 WRITE_ONCE(vcpu->preempted, true);
4391 WRITE_ONCE(vcpu->ready, true);
4392 }
4393 kvm_arch_vcpu_put(vcpu);
4394 }
4395
4396 static void check_processor_compat(void *rtn)
4397 {
4398 *(int *)rtn = kvm_arch_check_processor_compat();
4399 }
4400
4401 int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
4402 struct module *module)
4403 {
4404 int r;
4405 int cpu;
4406
4407 r = kvm_arch_init(opaque);
4408 if (r)
4409 goto out_fail;
4410
4411
4412
4413
4414
4415
4416
4417
4418 r = kvm_irqfd_init();
4419 if (r)
4420 goto out_irqfd;
4421
4422 if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
4423 r = -ENOMEM;
4424 goto out_free_0;
4425 }
4426
4427 r = kvm_arch_hardware_setup();
4428 if (r < 0)
4429 goto out_free_0a;
4430
4431 for_each_online_cpu(cpu) {
4432 smp_call_function_single(cpu, check_processor_compat, &r, 1);
4433 if (r < 0)
4434 goto out_free_1;
4435 }
4436
4437 r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_STARTING, "kvm/cpu:starting",
4438 kvm_starting_cpu, kvm_dying_cpu);
4439 if (r)
4440 goto out_free_2;
4441 register_reboot_notifier(&kvm_reboot_notifier);
4442
4443
4444 if (!vcpu_align)
4445 vcpu_align = __alignof__(struct kvm_vcpu);
4446 kvm_vcpu_cache =
4447 kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align,
4448 SLAB_ACCOUNT,
4449 offsetof(struct kvm_vcpu, arch),
4450 sizeof_field(struct kvm_vcpu, arch),
4451 NULL);
4452 if (!kvm_vcpu_cache) {
4453 r = -ENOMEM;
4454 goto out_free_3;
4455 }
4456
4457 r = kvm_async_pf_init();
4458 if (r)
4459 goto out_free;
4460
4461 kvm_chardev_ops.owner = module;
4462 kvm_vm_fops.owner = module;
4463 kvm_vcpu_fops.owner = module;
4464
4465 r = misc_register(&kvm_dev);
4466 if (r) {
4467 pr_err("kvm: misc device register failed\n");
4468 goto out_unreg;
4469 }
4470
4471 register_syscore_ops(&kvm_syscore_ops);
4472
4473 kvm_preempt_ops.sched_in = kvm_sched_in;
4474 kvm_preempt_ops.sched_out = kvm_sched_out;
4475
4476 kvm_init_debug();
4477
4478 r = kvm_vfio_ops_init();
4479 WARN_ON(r);
4480
4481 return 0;
4482
4483 out_unreg:
4484 kvm_async_pf_deinit();
4485 out_free:
4486 kmem_cache_destroy(kvm_vcpu_cache);
4487 out_free_3:
4488 unregister_reboot_notifier(&kvm_reboot_notifier);
4489 cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
4490 out_free_2:
4491 out_free_1:
4492 kvm_arch_hardware_unsetup();
4493 out_free_0a:
4494 free_cpumask_var(cpus_hardware_enabled);
4495 out_free_0:
4496 kvm_irqfd_exit();
4497 out_irqfd:
4498 kvm_arch_exit();
4499 out_fail:
4500 return r;
4501 }
4502 EXPORT_SYMBOL_GPL(kvm_init);
4503
4504 void kvm_exit(void)
4505 {
4506 debugfs_remove_recursive(kvm_debugfs_dir);
4507 misc_deregister(&kvm_dev);
4508 kmem_cache_destroy(kvm_vcpu_cache);
4509 kvm_async_pf_deinit();
4510 unregister_syscore_ops(&kvm_syscore_ops);
4511 unregister_reboot_notifier(&kvm_reboot_notifier);
4512 cpuhp_remove_state_nocalls(CPUHP_AP_KVM_STARTING);
4513 on_each_cpu(hardware_disable_nolock, NULL, 1);
4514 kvm_arch_hardware_unsetup();
4515 kvm_arch_exit();
4516 kvm_irqfd_exit();
4517 free_cpumask_var(cpus_hardware_enabled);
4518 kvm_vfio_ops_exit();
4519 }
4520 EXPORT_SYMBOL_GPL(kvm_exit);
4521
4522 struct kvm_vm_worker_thread_context {
4523 struct kvm *kvm;
4524 struct task_struct *parent;
4525 struct completion init_done;
4526 kvm_vm_thread_fn_t thread_fn;
4527 uintptr_t data;
4528 int err;
4529 };
4530
4531 static int kvm_vm_worker_thread(void *context)
4532 {
4533
4534
4535
4536
4537 struct kvm_vm_worker_thread_context *init_context = context;
4538 struct kvm *kvm = init_context->kvm;
4539 kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
4540 uintptr_t data = init_context->data;
4541 int err;
4542
4543 err = kthread_park(current);
4544
4545 WARN_ON(err != 0);
4546 if (err)
4547 goto init_complete;
4548
4549 err = cgroup_attach_task_all(init_context->parent, current);
4550 if (err) {
4551 kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
4552 __func__, err);
4553 goto init_complete;
4554 }
4555
4556 set_user_nice(current, task_nice(init_context->parent));
4557
4558 init_complete:
4559 init_context->err = err;
4560 complete(&init_context->init_done);
4561 init_context = NULL;
4562
4563 if (err)
4564 return err;
4565
4566
4567 kthread_parkme();
4568
4569 if (!kthread_should_stop())
4570 err = thread_fn(kvm, data);
4571
4572 return err;
4573 }
4574
4575 int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
4576 uintptr_t data, const char *name,
4577 struct task_struct **thread_ptr)
4578 {
4579 struct kvm_vm_worker_thread_context init_context = {};
4580 struct task_struct *thread;
4581
4582 *thread_ptr = NULL;
4583 init_context.kvm = kvm;
4584 init_context.parent = current;
4585 init_context.thread_fn = thread_fn;
4586 init_context.data = data;
4587 init_completion(&init_context.init_done);
4588
4589 thread = kthread_run(kvm_vm_worker_thread, &init_context,
4590 "%s-%d", name, task_pid_nr(current));
4591 if (IS_ERR(thread))
4592 return PTR_ERR(thread);
4593
4594
4595 WARN_ON(thread == NULL);
4596
4597 wait_for_completion(&init_context.init_done);
4598
4599 if (!init_context.err)
4600 *thread_ptr = thread;
4601
4602 return init_context.err;
4603 }