This source file includes following definitions.
- kvm_async_pf_hash_reset
- kvm_on_user_return
- shared_msr_update
- kvm_define_shared_msr
- kvm_shared_msr_cpu_online
- kvm_set_shared_msr
- drop_user_return_notifiers
- kvm_get_apic_base
- kvm_get_apic_mode
- kvm_set_apic_base
- kvm_spurious_fault
- exception_class
- exception_type
- kvm_deliver_exception_payload
- kvm_multiple_exception
- kvm_queue_exception
- kvm_requeue_exception
- kvm_queue_exception_p
- kvm_queue_exception_e_p
- kvm_complete_insn_gp
- kvm_inject_page_fault
- kvm_propagate_fault
- kvm_inject_nmi
- kvm_queue_exception_e
- kvm_requeue_exception_e
- kvm_require_cpl
- kvm_require_dr
- kvm_read_guest_page_mmu
- kvm_read_nested_guest_page
- pdptr_rsvd_bits
- load_pdptrs
- pdptrs_changed
- kvm_set_cr0
- kvm_lmsw
- kvm_load_guest_xcr0
- kvm_put_guest_xcr0
- __kvm_set_xcr
- kvm_set_xcr
- kvm_host_cr4_reserved_bits
- kvm_valid_cr4
- kvm_set_cr4
- kvm_set_cr3
- kvm_set_cr8
- kvm_get_cr8
- kvm_update_dr0123
- kvm_update_dr6
- kvm_update_dr7
- kvm_dr6_fixed
- __kvm_set_dr
- kvm_set_dr
- kvm_get_dr
- kvm_rdpmc
- kvm_get_arch_capabilities
- kvm_get_msr_feature
- do_get_msr_feature
- __kvm_valid_efer
- kvm_valid_efer
- set_efer
- kvm_enable_efer_bits
- __kvm_set_msr
- __kvm_get_msr
- kvm_get_msr
- kvm_set_msr
- kvm_emulate_rdmsr
- kvm_emulate_wrmsr
- do_get_msr
- do_set_msr
- update_pvclock_gtod
- kvm_set_pending_timer
- kvm_write_wall_clock
- div_frac
- kvm_get_time_scale
- adjust_tsc_khz
- set_tsc_khz
- kvm_set_tsc_khz
- compute_guest_tsc
- gtod_is_based_on_tsc
- kvm_track_tsc_matching
- update_ia32_tsc_adjust_msr
- __scale_tsc
- kvm_scale_tsc
- kvm_compute_tsc_offset
- kvm_read_l1_tsc
- kvm_vcpu_write_tsc_offset
- kvm_check_tsc_unstable
- kvm_write_tsc
- adjust_tsc_offset_guest
- adjust_tsc_offset_host
- read_tsc
- vgettsc
- do_monotonic_boot
- do_realtime
- kvm_get_time_and_clockread
- kvm_get_walltime_and_clockread
- pvclock_update_vm_gtod_copy
- kvm_make_mclock_inprogress_request
- kvm_gen_update_masterclock
- get_kvmclock_ns
- kvm_setup_pvclock_page
- kvm_guest_time_update
- kvmclock_update_fn
- kvm_gen_kvmclock_update
- kvmclock_sync_fn
- can_set_mci_status
- set_msr_mce
- xen_hvm_config
- kvm_pv_enable_async_pf
- kvmclock_reset
- kvm_vcpu_flush_tlb
- record_steal_time
- kvm_set_msr_common
- get_msr_mce
- kvm_get_msr_common
- __msr_io
- msr_io
- kvm_can_mwait_in_guest
- kvm_vm_ioctl_check_extension
- kvm_arch_dev_ioctl
- wbinvd_ipi
- need_emulate_wbinvd
- kvm_arch_vcpu_load
- kvm_steal_time_set_preempted
- kvm_arch_vcpu_put
- kvm_vcpu_ioctl_get_lapic
- kvm_vcpu_ioctl_set_lapic
- kvm_cpu_accept_dm_intr
- kvm_vcpu_ready_for_interrupt_injection
- kvm_vcpu_ioctl_interrupt
- kvm_vcpu_ioctl_nmi
- kvm_vcpu_ioctl_smi
- vcpu_ioctl_tpr_access_reporting
- kvm_vcpu_ioctl_x86_setup_mce
- kvm_vcpu_ioctl_x86_set_mce
- kvm_vcpu_ioctl_x86_get_vcpu_events
- kvm_vcpu_ioctl_x86_set_vcpu_events
- kvm_vcpu_ioctl_x86_get_debugregs
- kvm_vcpu_ioctl_x86_set_debugregs
- fill_xsave
- load_xsave
- kvm_vcpu_ioctl_x86_get_xsave
- kvm_vcpu_ioctl_x86_set_xsave
- kvm_vcpu_ioctl_x86_get_xcrs
- kvm_vcpu_ioctl_x86_set_xcrs
- kvm_set_guest_paused
- kvm_vcpu_ioctl_enable_cap
- kvm_arch_vcpu_ioctl
- kvm_arch_vcpu_fault
- kvm_vm_ioctl_set_tss_addr
- kvm_vm_ioctl_set_identity_map_addr
- kvm_vm_ioctl_set_nr_mmu_pages
- kvm_vm_ioctl_get_nr_mmu_pages
- kvm_vm_ioctl_get_irqchip
- kvm_vm_ioctl_set_irqchip
- kvm_vm_ioctl_get_pit
- kvm_vm_ioctl_set_pit
- kvm_vm_ioctl_get_pit2
- kvm_vm_ioctl_set_pit2
- kvm_vm_ioctl_reinject
- kvm_vm_ioctl_get_dirty_log
- kvm_vm_ioctl_clear_dirty_log
- kvm_vm_ioctl_irq_line
- kvm_vm_ioctl_enable_cap
- kvm_arch_vm_ioctl
- kvm_init_msr_list
- vcpu_mmio_write
- vcpu_mmio_read
- kvm_set_segment
- kvm_get_segment
- translate_nested_gpa
- kvm_mmu_gva_to_gpa_read
- kvm_mmu_gva_to_gpa_fetch
- kvm_mmu_gva_to_gpa_write
- kvm_mmu_gva_to_gpa_system
- kvm_read_guest_virt_helper
- kvm_fetch_guest_virt
- kvm_read_guest_virt
- emulator_read_std
- kvm_read_guest_phys_system
- kvm_write_guest_virt_helper
- emulator_write_std
- kvm_write_guest_virt_system
- handle_ud
- vcpu_is_mmio_gpa
- vcpu_mmio_gva_to_gpa
- emulator_write_phys
- read_prepare
- read_emulate
- write_emulate
- write_mmio
- read_exit_mmio
- write_exit_mmio
- emulator_read_write_onepage
- emulator_read_write
- emulator_read_emulated
- emulator_write_emulated
- emulator_cmpxchg_emulated
- kernel_pio
- emulator_pio_in_out
- emulator_pio_in_emulated
- emulator_pio_out_emulated
- get_segment_base
- emulator_invlpg
- kvm_emulate_wbinvd_noskip
- kvm_emulate_wbinvd
- emulator_wbinvd
- emulator_get_dr
- emulator_set_dr
- mk_cr_64
- emulator_get_cr
- emulator_set_cr
- emulator_get_cpl
- emulator_get_gdt
- emulator_get_idt
- emulator_set_gdt
- emulator_set_idt
- emulator_get_cached_segment_base
- emulator_get_segment
- emulator_set_segment
- emulator_get_msr
- emulator_set_msr
- emulator_get_smbase
- emulator_set_smbase
- emulator_check_pmc
- emulator_read_pmc
- emulator_halt
- emulator_intercept
- emulator_get_cpuid
- emulator_read_gpr
- emulator_write_gpr
- emulator_set_nmi_mask
- emulator_get_hflags
- emulator_set_hflags
- emulator_pre_leave_smm
- emulator_post_leave_smm
- emulator_set_xcr
- toggle_interruptibility
- inject_emulated_exception
- init_emulate_ctxt
- kvm_inject_realmode_interrupt
- handle_emulation_failure
- reexecute_instruction
- retry_instruction
- kvm_smm_changed
- kvm_vcpu_check_hw_bp
- kvm_vcpu_do_singlestep
- kvm_skip_emulated_instruction
- kvm_vcpu_check_breakpoint
- is_vmware_backdoor_opcode
- x86_emulate_instruction
- kvm_emulate_instruction
- kvm_emulate_instruction_from_buffer
- complete_fast_pio_out_port_0x7e
- complete_fast_pio_out
- kvm_fast_pio_out
- complete_fast_pio_in
- kvm_fast_pio_in
- kvm_fast_pio
- kvmclock_cpu_down_prep
- tsc_khz_changed
- kvm_hyperv_tsc_notifier
- __kvmclock_cpufreq_notifier
- kvmclock_cpufreq_notifier
- kvmclock_cpu_online
- kvm_timer_init
- kvm_is_in_guest
- kvm_is_user_mode
- kvm_get_guest_ip
- kvm_handle_intel_pt_intr
- pvclock_gtod_update_fn
- pvclock_gtod_notify
- kvm_arch_init
- kvm_arch_exit
- kvm_vcpu_halt
- kvm_emulate_halt
- kvm_pv_clock_pairing
- kvm_pv_kick_cpu_op
- kvm_vcpu_deactivate_apicv
- kvm_sched_yield
- kvm_emulate_hypercall
- emulator_fix_hypercall
- dm_request_for_irq_injection
- post_kvm_run_save
- update_cr8_intercept
- inject_pending_event
- process_nmi
- enter_smm_get_segment_flags
- enter_smm_save_seg_32
- enter_smm_save_seg_64
- enter_smm_save_state_32
- enter_smm_save_state_64
- enter_smm
- process_smi
- kvm_make_scan_ioapic_request
- vcpu_scan_ioapic
- vcpu_load_eoi_exitmap
- kvm_arch_mmu_notifier_invalidate_range
- kvm_vcpu_reload_apic_access_page
- __kvm_request_immediate_exit
- vcpu_enter_guest
- vcpu_block
- kvm_vcpu_running
- vcpu_run
- complete_emulated_io
- complete_emulated_pio
- complete_emulated_mmio
- kvm_save_current_fpu
- kvm_load_guest_fpu
- kvm_put_guest_fpu
- kvm_arch_vcpu_ioctl_run
- __get_regs
- kvm_arch_vcpu_ioctl_get_regs
- __set_regs
- kvm_arch_vcpu_ioctl_set_regs
- kvm_get_cs_db_l_bits
- __get_sregs
- kvm_arch_vcpu_ioctl_get_sregs
- kvm_arch_vcpu_ioctl_get_mpstate
- kvm_arch_vcpu_ioctl_set_mpstate
- kvm_task_switch
- kvm_valid_sregs
- __set_sregs
- kvm_arch_vcpu_ioctl_set_sregs
- kvm_arch_vcpu_ioctl_set_guest_debug
- kvm_arch_vcpu_ioctl_translate
- kvm_arch_vcpu_ioctl_get_fpu
- kvm_arch_vcpu_ioctl_set_fpu
- store_regs
- sync_regs
- fx_init
- kvm_arch_vcpu_free
- kvm_arch_vcpu_create
- kvm_arch_vcpu_setup
- kvm_arch_vcpu_postcreate
- kvm_arch_vcpu_destroy
- kvm_vcpu_reset
- kvm_vcpu_deliver_sipi_vector
- kvm_arch_hardware_enable
- kvm_arch_hardware_disable
- kvm_arch_hardware_setup
- kvm_arch_hardware_unsetup
- kvm_arch_check_processor_compat
- kvm_vcpu_is_reset_bsp
- kvm_vcpu_is_bsp
- kvm_arch_vcpu_init
- kvm_arch_vcpu_uninit
- kvm_arch_sched_in
- kvm_arch_init_vm
- kvm_arch_post_init_vm
- kvm_unload_vcpu_mmu
- kvm_free_vcpus
- kvm_arch_sync_events
- __x86_set_memory_region
- x86_set_memory_region
- kvm_arch_pre_destroy_vm
- kvm_arch_destroy_vm
- kvm_arch_free_memslot
- kvm_arch_create_memslot
- kvm_arch_memslots_updated
- kvm_arch_prepare_memory_region
- kvm_mmu_slot_apply_flags
- kvm_arch_commit_memory_region
- kvm_arch_flush_shadow_all
- kvm_arch_flush_shadow_memslot
- kvm_guest_apic_has_interrupt
- kvm_vcpu_has_events
- kvm_arch_vcpu_runnable
- kvm_arch_dy_runnable
- kvm_arch_vcpu_in_kernel
- kvm_arch_vcpu_should_kick
- kvm_arch_interrupt_allowed
- kvm_get_linear_rip
- kvm_is_linear_rip
- kvm_get_rflags
- __kvm_set_rflags
- kvm_set_rflags
- kvm_arch_async_page_ready
- kvm_async_pf_hash_fn
- kvm_async_pf_next_probe
- kvm_add_async_pf_gfn
- kvm_async_pf_gfn_slot
- kvm_find_async_pf_gfn
- kvm_del_async_pf_gfn
- apf_put_user
- apf_get_user
- kvm_can_deliver_async_pf
- kvm_can_do_async_pf
- kvm_arch_async_page_not_present
- kvm_arch_async_page_present
- kvm_arch_can_inject_async_page_present
- kvm_arch_start_assignment
- kvm_arch_end_assignment
- kvm_arch_has_assigned_device
- kvm_arch_register_noncoherent_dma
- kvm_arch_unregister_noncoherent_dma
- kvm_arch_has_noncoherent_dma
- kvm_arch_has_irq_bypass
- kvm_arch_irq_bypass_add_producer
- kvm_arch_irq_bypass_del_producer
- kvm_arch_update_irqfd_routing
- kvm_vector_hashing_enabled
- kvm_arch_no_poll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 #include <linux/kvm_host.h>
20 #include "irq.h"
21 #include "mmu.h"
22 #include "i8254.h"
23 #include "tss.h"
24 #include "kvm_cache_regs.h"
25 #include "x86.h"
26 #include "cpuid.h"
27 #include "pmu.h"
28 #include "hyperv.h"
29
30 #include <linux/clocksource.h>
31 #include <linux/interrupt.h>
32 #include <linux/kvm.h>
33 #include <linux/fs.h>
34 #include <linux/vmalloc.h>
35 #include <linux/export.h>
36 #include <linux/moduleparam.h>
37 #include <linux/mman.h>
38 #include <linux/highmem.h>
39 #include <linux/iommu.h>
40 #include <linux/intel-iommu.h>
41 #include <linux/cpufreq.h>
42 #include <linux/user-return-notifier.h>
43 #include <linux/srcu.h>
44 #include <linux/slab.h>
45 #include <linux/perf_event.h>
46 #include <linux/uaccess.h>
47 #include <linux/hash.h>
48 #include <linux/pci.h>
49 #include <linux/timekeeper_internal.h>
50 #include <linux/pvclock_gtod.h>
51 #include <linux/kvm_irqfd.h>
52 #include <linux/irqbypass.h>
53 #include <linux/sched/stat.h>
54 #include <linux/sched/isolation.h>
55 #include <linux/mem_encrypt.h>
56
57 #include <trace/events/kvm.h>
58
59 #include <asm/debugreg.h>
60 #include <asm/msr.h>
61 #include <asm/desc.h>
62 #include <asm/mce.h>
63 #include <linux/kernel_stat.h>
64 #include <asm/fpu/internal.h>
65 #include <asm/pvclock.h>
66 #include <asm/div64.h>
67 #include <asm/irq_remapping.h>
68 #include <asm/mshyperv.h>
69 #include <asm/hypervisor.h>
70 #include <asm/intel_pt.h>
71 #include <clocksource/hyperv_timer.h>
72
73 #define CREATE_TRACE_POINTS
74 #include "trace.h"
75
76 #define MAX_IO_MSRS 256
77 #define KVM_MAX_MCE_BANKS 32
78 u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
79 EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
80
81 #define emul_to_vcpu(ctxt) \
82 container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
83
84
85
86
87
88 #ifdef CONFIG_X86_64
89 static
90 u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
91 #else
92 static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
93 #endif
94
95 static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
96
97 #define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
98 #define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
99
100 #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
101 KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
102
103 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
104 static void process_nmi(struct kvm_vcpu *vcpu);
105 static void enter_smm(struct kvm_vcpu *vcpu);
106 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
107 static void store_regs(struct kvm_vcpu *vcpu);
108 static int sync_regs(struct kvm_vcpu *vcpu);
109
110 struct kvm_x86_ops *kvm_x86_ops __read_mostly;
111 EXPORT_SYMBOL_GPL(kvm_x86_ops);
112
113 static bool __read_mostly ignore_msrs = 0;
114 module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
115
116 static bool __read_mostly report_ignored_msrs = true;
117 module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
118
119 unsigned int min_timer_period_us = 200;
120 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
121
122 static bool __read_mostly kvmclock_periodic_sync = true;
123 module_param(kvmclock_periodic_sync, bool, S_IRUGO);
124
125 bool __read_mostly kvm_has_tsc_control;
126 EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
127 u32 __read_mostly kvm_max_guest_tsc_khz;
128 EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
129 u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits;
130 EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits);
131 u64 __read_mostly kvm_max_tsc_scaling_ratio;
132 EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
133 u64 __read_mostly kvm_default_tsc_scaling_ratio;
134 EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
135
136
137 static u32 __read_mostly tsc_tolerance_ppm = 250;
138 module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
139
140
141
142
143
144
145
146 static int __read_mostly lapic_timer_advance_ns = -1;
147 module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR);
148
149 static bool __read_mostly vector_hashing = true;
150 module_param(vector_hashing, bool, S_IRUGO);
151
152 bool __read_mostly enable_vmware_backdoor = false;
153 module_param(enable_vmware_backdoor, bool, S_IRUGO);
154 EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
155
156 static bool __read_mostly force_emulation_prefix = false;
157 module_param(force_emulation_prefix, bool, S_IRUGO);
158
159 int __read_mostly pi_inject_timer = -1;
160 module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
161
162 #define KVM_NR_SHARED_MSRS 16
163
164 struct kvm_shared_msrs_global {
165 int nr;
166 u32 msrs[KVM_NR_SHARED_MSRS];
167 };
168
169 struct kvm_shared_msrs {
170 struct user_return_notifier urn;
171 bool registered;
172 struct kvm_shared_msr_values {
173 u64 host;
174 u64 curr;
175 } values[KVM_NR_SHARED_MSRS];
176 };
177
178 static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
179 static struct kvm_shared_msrs __percpu *shared_msrs;
180
181 struct kvm_stats_debugfs_item debugfs_entries[] = {
182 { "pf_fixed", VCPU_STAT(pf_fixed) },
183 { "pf_guest", VCPU_STAT(pf_guest) },
184 { "tlb_flush", VCPU_STAT(tlb_flush) },
185 { "invlpg", VCPU_STAT(invlpg) },
186 { "exits", VCPU_STAT(exits) },
187 { "io_exits", VCPU_STAT(io_exits) },
188 { "mmio_exits", VCPU_STAT(mmio_exits) },
189 { "signal_exits", VCPU_STAT(signal_exits) },
190 { "irq_window", VCPU_STAT(irq_window_exits) },
191 { "nmi_window", VCPU_STAT(nmi_window_exits) },
192 { "halt_exits", VCPU_STAT(halt_exits) },
193 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
194 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
195 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
196 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
197 { "hypercalls", VCPU_STAT(hypercalls) },
198 { "request_irq", VCPU_STAT(request_irq_exits) },
199 { "irq_exits", VCPU_STAT(irq_exits) },
200 { "host_state_reload", VCPU_STAT(host_state_reload) },
201 { "fpu_reload", VCPU_STAT(fpu_reload) },
202 { "insn_emulation", VCPU_STAT(insn_emulation) },
203 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
204 { "irq_injections", VCPU_STAT(irq_injections) },
205 { "nmi_injections", VCPU_STAT(nmi_injections) },
206 { "req_event", VCPU_STAT(req_event) },
207 { "l1d_flush", VCPU_STAT(l1d_flush) },
208 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
209 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
210 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
211 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
212 { "mmu_flooded", VM_STAT(mmu_flooded) },
213 { "mmu_recycled", VM_STAT(mmu_recycled) },
214 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
215 { "mmu_unsync", VM_STAT(mmu_unsync) },
216 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
217 { "largepages", VM_STAT(lpages, .mode = 0444) },
218 { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
219 { "max_mmu_page_hash_collisions",
220 VM_STAT(max_mmu_page_hash_collisions) },
221 { NULL }
222 };
223
224 u64 __read_mostly host_xcr0;
225
226 struct kmem_cache *x86_fpu_cache;
227 EXPORT_SYMBOL_GPL(x86_fpu_cache);
228
229 static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
230
231 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
232 {
233 int i;
234 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
235 vcpu->arch.apf.gfns[i] = ~0;
236 }
237
238 static void kvm_on_user_return(struct user_return_notifier *urn)
239 {
240 unsigned slot;
241 struct kvm_shared_msrs *locals
242 = container_of(urn, struct kvm_shared_msrs, urn);
243 struct kvm_shared_msr_values *values;
244 unsigned long flags;
245
246
247
248
249
250 local_irq_save(flags);
251 if (locals->registered) {
252 locals->registered = false;
253 user_return_notifier_unregister(urn);
254 }
255 local_irq_restore(flags);
256 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
257 values = &locals->values[slot];
258 if (values->host != values->curr) {
259 wrmsrl(shared_msrs_global.msrs[slot], values->host);
260 values->curr = values->host;
261 }
262 }
263 }
264
265 static void shared_msr_update(unsigned slot, u32 msr)
266 {
267 u64 value;
268 unsigned int cpu = smp_processor_id();
269 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
270
271
272
273 if (slot >= shared_msrs_global.nr) {
274 printk(KERN_ERR "kvm: invalid MSR slot!");
275 return;
276 }
277 rdmsrl_safe(msr, &value);
278 smsr->values[slot].host = value;
279 smsr->values[slot].curr = value;
280 }
281
282 void kvm_define_shared_msr(unsigned slot, u32 msr)
283 {
284 BUG_ON(slot >= KVM_NR_SHARED_MSRS);
285 shared_msrs_global.msrs[slot] = msr;
286 if (slot >= shared_msrs_global.nr)
287 shared_msrs_global.nr = slot + 1;
288 }
289 EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
290
291 static void kvm_shared_msr_cpu_online(void)
292 {
293 unsigned i;
294
295 for (i = 0; i < shared_msrs_global.nr; ++i)
296 shared_msr_update(i, shared_msrs_global.msrs[i]);
297 }
298
299 int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
300 {
301 unsigned int cpu = smp_processor_id();
302 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
303 int err;
304
305 value = (value & mask) | (smsr->values[slot].host & ~mask);
306 if (value == smsr->values[slot].curr)
307 return 0;
308 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
309 if (err)
310 return 1;
311
312 smsr->values[slot].curr = value;
313 if (!smsr->registered) {
314 smsr->urn.on_user_return = kvm_on_user_return;
315 user_return_notifier_register(&smsr->urn);
316 smsr->registered = true;
317 }
318 return 0;
319 }
320 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
321
322 static void drop_user_return_notifiers(void)
323 {
324 unsigned int cpu = smp_processor_id();
325 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
326
327 if (smsr->registered)
328 kvm_on_user_return(&smsr->urn);
329 }
330
331 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
332 {
333 return vcpu->arch.apic_base;
334 }
335 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
336
337 enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
338 {
339 return kvm_apic_mode(kvm_get_apic_base(vcpu));
340 }
341 EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
342
343 int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
344 {
345 enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
346 enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
347 u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
348 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
349
350 if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
351 return 1;
352 if (!msr_info->host_initiated) {
353 if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
354 return 1;
355 if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
356 return 1;
357 }
358
359 kvm_lapic_set_base(vcpu, msr_info->data);
360 return 0;
361 }
362 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
363
364 asmlinkage __visible void kvm_spurious_fault(void)
365 {
366
367 BUG_ON(!kvm_rebooting);
368 }
369 EXPORT_SYMBOL_GPL(kvm_spurious_fault);
370
371 #define EXCPT_BENIGN 0
372 #define EXCPT_CONTRIBUTORY 1
373 #define EXCPT_PF 2
374
375 static int exception_class(int vector)
376 {
377 switch (vector) {
378 case PF_VECTOR:
379 return EXCPT_PF;
380 case DE_VECTOR:
381 case TS_VECTOR:
382 case NP_VECTOR:
383 case SS_VECTOR:
384 case GP_VECTOR:
385 return EXCPT_CONTRIBUTORY;
386 default:
387 break;
388 }
389 return EXCPT_BENIGN;
390 }
391
392 #define EXCPT_FAULT 0
393 #define EXCPT_TRAP 1
394 #define EXCPT_ABORT 2
395 #define EXCPT_INTERRUPT 3
396
397 static int exception_type(int vector)
398 {
399 unsigned int mask;
400
401 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
402 return EXCPT_INTERRUPT;
403
404 mask = 1 << vector;
405
406
407 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
408 return EXCPT_TRAP;
409
410 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
411 return EXCPT_ABORT;
412
413
414 return EXCPT_FAULT;
415 }
416
417 void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
418 {
419 unsigned nr = vcpu->arch.exception.nr;
420 bool has_payload = vcpu->arch.exception.has_payload;
421 unsigned long payload = vcpu->arch.exception.payload;
422
423 if (!has_payload)
424 return;
425
426 switch (nr) {
427 case DB_VECTOR:
428
429
430
431
432
433 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
434
435
436
437 vcpu->arch.dr6 |= DR6_RTM;
438 vcpu->arch.dr6 |= payload;
439
440
441
442
443
444
445
446
447 vcpu->arch.dr6 ^= payload & DR6_RTM;
448
449
450
451
452
453
454
455 vcpu->arch.dr6 &= ~BIT(12);
456 break;
457 case PF_VECTOR:
458 vcpu->arch.cr2 = payload;
459 break;
460 }
461
462 vcpu->arch.exception.has_payload = false;
463 vcpu->arch.exception.payload = 0;
464 }
465 EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload);
466
467 static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
468 unsigned nr, bool has_error, u32 error_code,
469 bool has_payload, unsigned long payload, bool reinject)
470 {
471 u32 prev_nr;
472 int class1, class2;
473
474 kvm_make_request(KVM_REQ_EVENT, vcpu);
475
476 if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
477 queue:
478 if (has_error && !is_protmode(vcpu))
479 has_error = false;
480 if (reinject) {
481
482
483
484
485
486
487
488
489 WARN_ON_ONCE(vcpu->arch.exception.pending);
490 vcpu->arch.exception.injected = true;
491 if (WARN_ON_ONCE(has_payload)) {
492
493
494
495
496 has_payload = false;
497 payload = 0;
498 }
499 } else {
500 vcpu->arch.exception.pending = true;
501 vcpu->arch.exception.injected = false;
502 }
503 vcpu->arch.exception.has_error_code = has_error;
504 vcpu->arch.exception.nr = nr;
505 vcpu->arch.exception.error_code = error_code;
506 vcpu->arch.exception.has_payload = has_payload;
507 vcpu->arch.exception.payload = payload;
508
509
510
511
512
513
514
515
516
517
518
519 if (!vcpu->kvm->arch.exception_payload_enabled ||
520 !is_guest_mode(vcpu))
521 kvm_deliver_exception_payload(vcpu);
522 return;
523 }
524
525
526 prev_nr = vcpu->arch.exception.nr;
527 if (prev_nr == DF_VECTOR) {
528
529 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
530 return;
531 }
532 class1 = exception_class(prev_nr);
533 class2 = exception_class(nr);
534 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
535 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
536
537
538
539
540
541 vcpu->arch.exception.pending = true;
542 vcpu->arch.exception.injected = false;
543 vcpu->arch.exception.has_error_code = true;
544 vcpu->arch.exception.nr = DF_VECTOR;
545 vcpu->arch.exception.error_code = 0;
546 vcpu->arch.exception.has_payload = false;
547 vcpu->arch.exception.payload = 0;
548 } else
549
550
551
552 goto queue;
553 }
554
555 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
556 {
557 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false);
558 }
559 EXPORT_SYMBOL_GPL(kvm_queue_exception);
560
561 void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
562 {
563 kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true);
564 }
565 EXPORT_SYMBOL_GPL(kvm_requeue_exception);
566
567 static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr,
568 unsigned long payload)
569 {
570 kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false);
571 }
572
573 static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr,
574 u32 error_code, unsigned long payload)
575 {
576 kvm_multiple_exception(vcpu, nr, true, error_code,
577 true, payload, false);
578 }
579
580 int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
581 {
582 if (err)
583 kvm_inject_gp(vcpu, 0);
584 else
585 return kvm_skip_emulated_instruction(vcpu);
586
587 return 1;
588 }
589 EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
590
591 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
592 {
593 ++vcpu->stat.pf_guest;
594 vcpu->arch.exception.nested_apf =
595 is_guest_mode(vcpu) && fault->async_page_fault;
596 if (vcpu->arch.exception.nested_apf) {
597 vcpu->arch.apf.nested_apf_token = fault->address;
598 kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
599 } else {
600 kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
601 fault->address);
602 }
603 }
604 EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
605
606 static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
607 {
608 if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
609 vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
610 else
611 vcpu->arch.mmu->inject_page_fault(vcpu, fault);
612
613 return fault->nested_page_fault;
614 }
615
616 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
617 {
618 atomic_inc(&vcpu->arch.nmi_queued);
619 kvm_make_request(KVM_REQ_NMI, vcpu);
620 }
621 EXPORT_SYMBOL_GPL(kvm_inject_nmi);
622
623 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
624 {
625 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false);
626 }
627 EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
628
629 void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
630 {
631 kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true);
632 }
633 EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
634
635
636
637
638
639 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
640 {
641 if (kvm_x86_ops->get_cpl(vcpu) <= required_cpl)
642 return true;
643 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
644 return false;
645 }
646 EXPORT_SYMBOL_GPL(kvm_require_cpl);
647
648 bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
649 {
650 if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
651 return true;
652
653 kvm_queue_exception(vcpu, UD_VECTOR);
654 return false;
655 }
656 EXPORT_SYMBOL_GPL(kvm_require_dr);
657
658
659
660
661
662
663 int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
664 gfn_t ngfn, void *data, int offset, int len,
665 u32 access)
666 {
667 struct x86_exception exception;
668 gfn_t real_gfn;
669 gpa_t ngpa;
670
671 ngpa = gfn_to_gpa(ngfn);
672 real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
673 if (real_gfn == UNMAPPED_GVA)
674 return -EFAULT;
675
676 real_gfn = gpa_to_gfn(real_gfn);
677
678 return kvm_vcpu_read_guest_page(vcpu, real_gfn, data, offset, len);
679 }
680 EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
681
682 static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
683 void *data, int offset, int len, u32 access)
684 {
685 return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
686 data, offset, len, access);
687 }
688
689 static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
690 {
691 return rsvd_bits(cpuid_maxphyaddr(vcpu), 63) | rsvd_bits(5, 8) |
692 rsvd_bits(1, 2);
693 }
694
695
696
697
698 int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
699 {
700 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
701 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
702 int i;
703 int ret;
704 u64 pdpte[ARRAY_SIZE(mmu->pdptrs)];
705
706 ret = kvm_read_guest_page_mmu(vcpu, mmu, pdpt_gfn, pdpte,
707 offset * sizeof(u64), sizeof(pdpte),
708 PFERR_USER_MASK|PFERR_WRITE_MASK);
709 if (ret < 0) {
710 ret = 0;
711 goto out;
712 }
713 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
714 if ((pdpte[i] & PT_PRESENT_MASK) &&
715 (pdpte[i] & pdptr_rsvd_bits(vcpu))) {
716 ret = 0;
717 goto out;
718 }
719 }
720 ret = 1;
721
722 memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
723 __set_bit(VCPU_EXREG_PDPTR,
724 (unsigned long *)&vcpu->arch.regs_avail);
725 __set_bit(VCPU_EXREG_PDPTR,
726 (unsigned long *)&vcpu->arch.regs_dirty);
727 out:
728
729 return ret;
730 }
731 EXPORT_SYMBOL_GPL(load_pdptrs);
732
733 bool pdptrs_changed(struct kvm_vcpu *vcpu)
734 {
735 u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
736 bool changed = true;
737 int offset;
738 gfn_t gfn;
739 int r;
740
741 if (!is_pae_paging(vcpu))
742 return false;
743
744 if (!test_bit(VCPU_EXREG_PDPTR,
745 (unsigned long *)&vcpu->arch.regs_avail))
746 return true;
747
748 gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
749 offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1);
750 r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
751 PFERR_USER_MASK | PFERR_WRITE_MASK);
752 if (r < 0)
753 goto out;
754 changed = memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
755 out:
756
757 return changed;
758 }
759 EXPORT_SYMBOL_GPL(pdptrs_changed);
760
761 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
762 {
763 unsigned long old_cr0 = kvm_read_cr0(vcpu);
764 unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
765
766 cr0 |= X86_CR0_ET;
767
768 #ifdef CONFIG_X86_64
769 if (cr0 & 0xffffffff00000000UL)
770 return 1;
771 #endif
772
773 cr0 &= ~CR0_RESERVED_BITS;
774
775 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
776 return 1;
777
778 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
779 return 1;
780
781 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
782 #ifdef CONFIG_X86_64
783 if ((vcpu->arch.efer & EFER_LME)) {
784 int cs_db, cs_l;
785
786 if (!is_pae(vcpu))
787 return 1;
788 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
789 if (cs_l)
790 return 1;
791 } else
792 #endif
793 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
794 kvm_read_cr3(vcpu)))
795 return 1;
796 }
797
798 if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
799 return 1;
800
801 kvm_x86_ops->set_cr0(vcpu, cr0);
802
803 if ((cr0 ^ old_cr0) & X86_CR0_PG) {
804 kvm_clear_async_pf_completion_queue(vcpu);
805 kvm_async_pf_hash_reset(vcpu);
806 }
807
808 if ((cr0 ^ old_cr0) & update_bits)
809 kvm_mmu_reset_context(vcpu);
810
811 if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
812 kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
813 !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
814 kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
815
816 return 0;
817 }
818 EXPORT_SYMBOL_GPL(kvm_set_cr0);
819
820 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
821 {
822 (void)kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0eul) | (msw & 0x0f));
823 }
824 EXPORT_SYMBOL_GPL(kvm_lmsw);
825
826 void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
827 {
828 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
829 !vcpu->guest_xcr0_loaded) {
830
831 if (vcpu->arch.xcr0 != host_xcr0)
832 xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
833 vcpu->guest_xcr0_loaded = 1;
834 }
835
836 if (static_cpu_has(X86_FEATURE_PKU) &&
837 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
838 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
839 vcpu->arch.pkru != vcpu->arch.host_pkru)
840 __write_pkru(vcpu->arch.pkru);
841 }
842 EXPORT_SYMBOL_GPL(kvm_load_guest_xcr0);
843
844 void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
845 {
846 if (static_cpu_has(X86_FEATURE_PKU) &&
847 (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
848 (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
849 vcpu->arch.pkru = rdpkru();
850 if (vcpu->arch.pkru != vcpu->arch.host_pkru)
851 __write_pkru(vcpu->arch.host_pkru);
852 }
853
854 if (vcpu->guest_xcr0_loaded) {
855 if (vcpu->arch.xcr0 != host_xcr0)
856 xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
857 vcpu->guest_xcr0_loaded = 0;
858 }
859 }
860 EXPORT_SYMBOL_GPL(kvm_put_guest_xcr0);
861
862 static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
863 {
864 u64 xcr0 = xcr;
865 u64 old_xcr0 = vcpu->arch.xcr0;
866 u64 valid_bits;
867
868
869 if (index != XCR_XFEATURE_ENABLED_MASK)
870 return 1;
871 if (!(xcr0 & XFEATURE_MASK_FP))
872 return 1;
873 if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
874 return 1;
875
876
877
878
879
880
881 valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
882 if (xcr0 & ~valid_bits)
883 return 1;
884
885 if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
886 (!(xcr0 & XFEATURE_MASK_BNDCSR)))
887 return 1;
888
889 if (xcr0 & XFEATURE_MASK_AVX512) {
890 if (!(xcr0 & XFEATURE_MASK_YMM))
891 return 1;
892 if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
893 return 1;
894 }
895 vcpu->arch.xcr0 = xcr0;
896
897 if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
898 kvm_update_cpuid(vcpu);
899 return 0;
900 }
901
902 int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
903 {
904 if (kvm_x86_ops->get_cpl(vcpu) != 0 ||
905 __kvm_set_xcr(vcpu, index, xcr)) {
906 kvm_inject_gp(vcpu, 0);
907 return 1;
908 }
909 return 0;
910 }
911 EXPORT_SYMBOL_GPL(kvm_set_xcr);
912
913 static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c)
914 {
915 u64 reserved_bits = CR4_RESERVED_BITS;
916
917 if (!cpu_has(c, X86_FEATURE_XSAVE))
918 reserved_bits |= X86_CR4_OSXSAVE;
919
920 if (!cpu_has(c, X86_FEATURE_SMEP))
921 reserved_bits |= X86_CR4_SMEP;
922
923 if (!cpu_has(c, X86_FEATURE_SMAP))
924 reserved_bits |= X86_CR4_SMAP;
925
926 if (!cpu_has(c, X86_FEATURE_FSGSBASE))
927 reserved_bits |= X86_CR4_FSGSBASE;
928
929 if (!cpu_has(c, X86_FEATURE_PKU))
930 reserved_bits |= X86_CR4_PKE;
931
932 if (!cpu_has(c, X86_FEATURE_LA57) &&
933 !(cpuid_ecx(0x7) & bit(X86_FEATURE_LA57)))
934 reserved_bits |= X86_CR4_LA57;
935
936 if (!cpu_has(c, X86_FEATURE_UMIP) && !kvm_x86_ops->umip_emulated())
937 reserved_bits |= X86_CR4_UMIP;
938
939 return reserved_bits;
940 }
941
942 static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
943 {
944 if (cr4 & cr4_reserved_bits)
945 return -EINVAL;
946
947 if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
948 return -EINVAL;
949
950 if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
951 return -EINVAL;
952
953 if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
954 return -EINVAL;
955
956 if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
957 return -EINVAL;
958
959 if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))
960 return -EINVAL;
961
962 if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
963 return -EINVAL;
964
965 if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
966 return -EINVAL;
967
968 return 0;
969 }
970
971 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
972 {
973 unsigned long old_cr4 = kvm_read_cr4(vcpu);
974 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
975 X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
976
977 if (kvm_valid_cr4(vcpu, cr4))
978 return 1;
979
980 if (is_long_mode(vcpu)) {
981 if (!(cr4 & X86_CR4_PAE))
982 return 1;
983 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
984 && ((cr4 ^ old_cr4) & pdptr_bits)
985 && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
986 kvm_read_cr3(vcpu)))
987 return 1;
988
989 if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
990 if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
991 return 1;
992
993
994 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
995 return 1;
996 }
997
998 if (kvm_x86_ops->set_cr4(vcpu, cr4))
999 return 1;
1000
1001 if (((cr4 ^ old_cr4) & pdptr_bits) ||
1002 (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
1003 kvm_mmu_reset_context(vcpu);
1004
1005 if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE))
1006 kvm_update_cpuid(vcpu);
1007
1008 return 0;
1009 }
1010 EXPORT_SYMBOL_GPL(kvm_set_cr4);
1011
1012 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1013 {
1014 bool skip_tlb_flush = false;
1015 #ifdef CONFIG_X86_64
1016 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
1017
1018 if (pcid_enabled) {
1019 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
1020 cr3 &= ~X86_CR3_PCID_NOFLUSH;
1021 }
1022 #endif
1023
1024 if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
1025 if (!skip_tlb_flush) {
1026 kvm_mmu_sync_roots(vcpu);
1027 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1028 }
1029 return 0;
1030 }
1031
1032 if (is_long_mode(vcpu) &&
1033 (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
1034 return 1;
1035 else if (is_pae_paging(vcpu) &&
1036 !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
1037 return 1;
1038
1039 kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
1040 vcpu->arch.cr3 = cr3;
1041 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
1042
1043 return 0;
1044 }
1045 EXPORT_SYMBOL_GPL(kvm_set_cr3);
1046
1047 int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
1048 {
1049 if (cr8 & CR8_RESERVED_BITS)
1050 return 1;
1051 if (lapic_in_kernel(vcpu))
1052 kvm_lapic_set_tpr(vcpu, cr8);
1053 else
1054 vcpu->arch.cr8 = cr8;
1055 return 0;
1056 }
1057 EXPORT_SYMBOL_GPL(kvm_set_cr8);
1058
1059 unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
1060 {
1061 if (lapic_in_kernel(vcpu))
1062 return kvm_lapic_get_cr8(vcpu);
1063 else
1064 return vcpu->arch.cr8;
1065 }
1066 EXPORT_SYMBOL_GPL(kvm_get_cr8);
1067
1068 static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
1069 {
1070 int i;
1071
1072 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1073 for (i = 0; i < KVM_NR_DB_REGS; i++)
1074 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
1075 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
1076 }
1077 }
1078
1079 static void kvm_update_dr6(struct kvm_vcpu *vcpu)
1080 {
1081 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1082 kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
1083 }
1084
1085 static void kvm_update_dr7(struct kvm_vcpu *vcpu)
1086 {
1087 unsigned long dr7;
1088
1089 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1090 dr7 = vcpu->arch.guest_debug_dr7;
1091 else
1092 dr7 = vcpu->arch.dr7;
1093 kvm_x86_ops->set_dr7(vcpu, dr7);
1094 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
1095 if (dr7 & DR7_BP_EN_MASK)
1096 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
1097 }
1098
1099 static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
1100 {
1101 u64 fixed = DR6_FIXED_1;
1102
1103 if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
1104 fixed |= DR6_RTM;
1105 return fixed;
1106 }
1107
1108 static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1109 {
1110 size_t size = ARRAY_SIZE(vcpu->arch.db);
1111
1112 switch (dr) {
1113 case 0 ... 3:
1114 vcpu->arch.db[array_index_nospec(dr, size)] = val;
1115 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1116 vcpu->arch.eff_db[dr] = val;
1117 break;
1118 case 4:
1119
1120 case 6:
1121 if (val & 0xffffffff00000000ULL)
1122 return -1;
1123 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
1124 kvm_update_dr6(vcpu);
1125 break;
1126 case 5:
1127
1128 default:
1129 if (val & 0xffffffff00000000ULL)
1130 return -1;
1131 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
1132 kvm_update_dr7(vcpu);
1133 break;
1134 }
1135
1136 return 0;
1137 }
1138
1139 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
1140 {
1141 if (__kvm_set_dr(vcpu, dr, val)) {
1142 kvm_inject_gp(vcpu, 0);
1143 return 1;
1144 }
1145 return 0;
1146 }
1147 EXPORT_SYMBOL_GPL(kvm_set_dr);
1148
1149 int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
1150 {
1151 size_t size = ARRAY_SIZE(vcpu->arch.db);
1152
1153 switch (dr) {
1154 case 0 ... 3:
1155 *val = vcpu->arch.db[array_index_nospec(dr, size)];
1156 break;
1157 case 4:
1158
1159 case 6:
1160 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1161 *val = vcpu->arch.dr6;
1162 else
1163 *val = kvm_x86_ops->get_dr6(vcpu);
1164 break;
1165 case 5:
1166
1167 default:
1168 *val = vcpu->arch.dr7;
1169 break;
1170 }
1171 return 0;
1172 }
1173 EXPORT_SYMBOL_GPL(kvm_get_dr);
1174
1175 bool kvm_rdpmc(struct kvm_vcpu *vcpu)
1176 {
1177 u32 ecx = kvm_rcx_read(vcpu);
1178 u64 data;
1179 int err;
1180
1181 err = kvm_pmu_rdpmc(vcpu, ecx, &data);
1182 if (err)
1183 return err;
1184 kvm_rax_write(vcpu, (u32)data);
1185 kvm_rdx_write(vcpu, data >> 32);
1186 return err;
1187 }
1188 EXPORT_SYMBOL_GPL(kvm_rdpmc);
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202 static const u32 msrs_to_save_all[] = {
1203 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
1204 MSR_STAR,
1205 #ifdef CONFIG_X86_64
1206 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
1207 #endif
1208 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
1209 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
1210 MSR_IA32_SPEC_CTRL,
1211 MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
1212 MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
1213 MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
1214 MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
1215 MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
1216 MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
1217 MSR_IA32_UMWAIT_CONTROL,
1218
1219 MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
1220 MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
1221 MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
1222 MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
1223 MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
1224 MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
1225 MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
1226 MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
1227 MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
1228 MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
1229 MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
1230 MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
1231 MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
1232 MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
1233 MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
1234 MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
1235 MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
1236 MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
1237 MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
1238 MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
1239 MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
1240 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
1241 };
1242
1243 static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
1244 static unsigned num_msrs_to_save;
1245
1246 static const u32 emulated_msrs_all[] = {
1247 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
1248 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
1249 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
1250 HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
1251 HV_X64_MSR_TSC_FREQUENCY, HV_X64_MSR_APIC_FREQUENCY,
1252 HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
1253 HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
1254 HV_X64_MSR_RESET,
1255 HV_X64_MSR_VP_INDEX,
1256 HV_X64_MSR_VP_RUNTIME,
1257 HV_X64_MSR_SCONTROL,
1258 HV_X64_MSR_STIMER0_CONFIG,
1259 HV_X64_MSR_VP_ASSIST_PAGE,
1260 HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
1261 HV_X64_MSR_TSC_EMULATION_STATUS,
1262
1263 MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
1264 MSR_KVM_PV_EOI_EN,
1265
1266 MSR_IA32_TSC_ADJUST,
1267 MSR_IA32_TSCDEADLINE,
1268 MSR_IA32_ARCH_CAPABILITIES,
1269 MSR_IA32_MISC_ENABLE,
1270 MSR_IA32_MCG_STATUS,
1271 MSR_IA32_MCG_CTL,
1272 MSR_IA32_MCG_EXT_CTL,
1273 MSR_IA32_SMBASE,
1274 MSR_SMI_COUNT,
1275 MSR_PLATFORM_INFO,
1276 MSR_MISC_FEATURES_ENABLES,
1277 MSR_AMD64_VIRT_SPEC_CTRL,
1278 MSR_IA32_POWER_CTL,
1279
1280
1281
1282
1283
1284
1285
1286
1287 MSR_IA32_VMX_BASIC,
1288 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1289 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1290 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1291 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1292 MSR_IA32_VMX_MISC,
1293 MSR_IA32_VMX_CR0_FIXED0,
1294 MSR_IA32_VMX_CR4_FIXED0,
1295 MSR_IA32_VMX_VMCS_ENUM,
1296 MSR_IA32_VMX_PROCBASED_CTLS2,
1297 MSR_IA32_VMX_EPT_VPID_CAP,
1298 MSR_IA32_VMX_VMFUNC,
1299
1300 MSR_K7_HWCR,
1301 MSR_KVM_POLL_CONTROL,
1302 };
1303
1304 static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
1305 static unsigned num_emulated_msrs;
1306
1307
1308
1309
1310
1311 static const u32 msr_based_features_all[] = {
1312 MSR_IA32_VMX_BASIC,
1313 MSR_IA32_VMX_TRUE_PINBASED_CTLS,
1314 MSR_IA32_VMX_PINBASED_CTLS,
1315 MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
1316 MSR_IA32_VMX_PROCBASED_CTLS,
1317 MSR_IA32_VMX_TRUE_EXIT_CTLS,
1318 MSR_IA32_VMX_EXIT_CTLS,
1319 MSR_IA32_VMX_TRUE_ENTRY_CTLS,
1320 MSR_IA32_VMX_ENTRY_CTLS,
1321 MSR_IA32_VMX_MISC,
1322 MSR_IA32_VMX_CR0_FIXED0,
1323 MSR_IA32_VMX_CR0_FIXED1,
1324 MSR_IA32_VMX_CR4_FIXED0,
1325 MSR_IA32_VMX_CR4_FIXED1,
1326 MSR_IA32_VMX_VMCS_ENUM,
1327 MSR_IA32_VMX_PROCBASED_CTLS2,
1328 MSR_IA32_VMX_EPT_VPID_CAP,
1329 MSR_IA32_VMX_VMFUNC,
1330
1331 MSR_F10H_DECFG,
1332 MSR_IA32_UCODE_REV,
1333 MSR_IA32_ARCH_CAPABILITIES,
1334 };
1335
1336 static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
1337 static unsigned int num_msr_based_features;
1338
1339 static u64 kvm_get_arch_capabilities(void)
1340 {
1341 u64 data = 0;
1342
1343 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
1344 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
1345
1346
1347
1348
1349
1350
1351
1352 data |= ARCH_CAP_PSCHANGE_MC_NO;
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363 if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER)
1364 data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH;
1365
1366 if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
1367 data |= ARCH_CAP_RDCL_NO;
1368 if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
1369 data |= ARCH_CAP_SSB_NO;
1370 if (!boot_cpu_has_bug(X86_BUG_MDS))
1371 data |= ARCH_CAP_MDS_NO;
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388 if (!boot_cpu_has(X86_FEATURE_RTM))
1389 data &= ~ARCH_CAP_TAA_NO;
1390 else if (!boot_cpu_has_bug(X86_BUG_TAA))
1391 data |= ARCH_CAP_TAA_NO;
1392 else if (data & ARCH_CAP_TSX_CTRL_MSR)
1393 data &= ~ARCH_CAP_MDS_NO;
1394
1395
1396 data &= ~ARCH_CAP_TSX_CTRL_MSR;
1397 return data;
1398 }
1399
1400 static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
1401 {
1402 switch (msr->index) {
1403 case MSR_IA32_ARCH_CAPABILITIES:
1404 msr->data = kvm_get_arch_capabilities();
1405 break;
1406 case MSR_IA32_UCODE_REV:
1407 rdmsrl_safe(msr->index, &msr->data);
1408 break;
1409 default:
1410 if (kvm_x86_ops->get_msr_feature(msr))
1411 return 1;
1412 }
1413 return 0;
1414 }
1415
1416 static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1417 {
1418 struct kvm_msr_entry msr;
1419 int r;
1420
1421 msr.index = index;
1422 r = kvm_get_msr_feature(&msr);
1423 if (r)
1424 return r;
1425
1426 *data = msr.data;
1427
1428 return 0;
1429 }
1430
1431 static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1432 {
1433 if (efer & EFER_FFXSR && !guest_cpuid_has(vcpu, X86_FEATURE_FXSR_OPT))
1434 return false;
1435
1436 if (efer & EFER_SVME && !guest_cpuid_has(vcpu, X86_FEATURE_SVM))
1437 return false;
1438
1439 if (efer & (EFER_LME | EFER_LMA) &&
1440 !guest_cpuid_has(vcpu, X86_FEATURE_LM))
1441 return false;
1442
1443 if (efer & EFER_NX && !guest_cpuid_has(vcpu, X86_FEATURE_NX))
1444 return false;
1445
1446 return true;
1447
1448 }
1449 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
1450 {
1451 if (efer & efer_reserved_bits)
1452 return false;
1453
1454 return __kvm_valid_efer(vcpu, efer);
1455 }
1456 EXPORT_SYMBOL_GPL(kvm_valid_efer);
1457
1458 static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1459 {
1460 u64 old_efer = vcpu->arch.efer;
1461 u64 efer = msr_info->data;
1462
1463 if (efer & efer_reserved_bits)
1464 return 1;
1465
1466 if (!msr_info->host_initiated) {
1467 if (!__kvm_valid_efer(vcpu, efer))
1468 return 1;
1469
1470 if (is_paging(vcpu) &&
1471 (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME))
1472 return 1;
1473 }
1474
1475 efer &= ~EFER_LMA;
1476 efer |= vcpu->arch.efer & EFER_LMA;
1477
1478 kvm_x86_ops->set_efer(vcpu, efer);
1479
1480
1481 if ((efer ^ old_efer) & EFER_NX)
1482 kvm_mmu_reset_context(vcpu);
1483
1484 return 0;
1485 }
1486
1487 void kvm_enable_efer_bits(u64 mask)
1488 {
1489 efer_reserved_bits &= ~mask;
1490 }
1491 EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
1492
1493
1494
1495
1496
1497
1498
1499 static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
1500 bool host_initiated)
1501 {
1502 struct msr_data msr;
1503
1504 switch (index) {
1505 case MSR_FS_BASE:
1506 case MSR_GS_BASE:
1507 case MSR_KERNEL_GS_BASE:
1508 case MSR_CSTAR:
1509 case MSR_LSTAR:
1510 if (is_noncanonical_address(data, vcpu))
1511 return 1;
1512 break;
1513 case MSR_IA32_SYSENTER_EIP:
1514 case MSR_IA32_SYSENTER_ESP:
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
1528 }
1529
1530 msr.data = data;
1531 msr.index = index;
1532 msr.host_initiated = host_initiated;
1533
1534 return kvm_x86_ops->set_msr(vcpu, &msr);
1535 }
1536
1537
1538
1539
1540
1541
1542
1543 static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
1544 bool host_initiated)
1545 {
1546 struct msr_data msr;
1547 int ret;
1548
1549 msr.index = index;
1550 msr.host_initiated = host_initiated;
1551
1552 ret = kvm_x86_ops->get_msr(vcpu, &msr);
1553 if (!ret)
1554 *data = msr.data;
1555 return ret;
1556 }
1557
1558 int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
1559 {
1560 return __kvm_get_msr(vcpu, index, data, false);
1561 }
1562 EXPORT_SYMBOL_GPL(kvm_get_msr);
1563
1564 int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
1565 {
1566 return __kvm_set_msr(vcpu, index, data, false);
1567 }
1568 EXPORT_SYMBOL_GPL(kvm_set_msr);
1569
1570 int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
1571 {
1572 u32 ecx = kvm_rcx_read(vcpu);
1573 u64 data;
1574
1575 if (kvm_get_msr(vcpu, ecx, &data)) {
1576 trace_kvm_msr_read_ex(ecx);
1577 kvm_inject_gp(vcpu, 0);
1578 return 1;
1579 }
1580
1581 trace_kvm_msr_read(ecx, data);
1582
1583 kvm_rax_write(vcpu, data & -1u);
1584 kvm_rdx_write(vcpu, (data >> 32) & -1u);
1585 return kvm_skip_emulated_instruction(vcpu);
1586 }
1587 EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
1588
1589 int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
1590 {
1591 u32 ecx = kvm_rcx_read(vcpu);
1592 u64 data = kvm_read_edx_eax(vcpu);
1593
1594 if (kvm_set_msr(vcpu, ecx, data)) {
1595 trace_kvm_msr_write_ex(ecx, data);
1596 kvm_inject_gp(vcpu, 0);
1597 return 1;
1598 }
1599
1600 trace_kvm_msr_write(ecx, data);
1601 return kvm_skip_emulated_instruction(vcpu);
1602 }
1603 EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
1604
1605
1606
1607
1608 static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1609 {
1610 return __kvm_get_msr(vcpu, index, data, true);
1611 }
1612
1613 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
1614 {
1615 return __kvm_set_msr(vcpu, index, *data, true);
1616 }
1617
1618 #ifdef CONFIG_X86_64
1619 struct pvclock_gtod_data {
1620 seqcount_t seq;
1621
1622 struct {
1623 int vclock_mode;
1624 u64 cycle_last;
1625 u64 mask;
1626 u32 mult;
1627 u32 shift;
1628 } clock;
1629
1630 u64 boot_ns;
1631 u64 nsec_base;
1632 u64 wall_time_sec;
1633 };
1634
1635 static struct pvclock_gtod_data pvclock_gtod_data;
1636
1637 static void update_pvclock_gtod(struct timekeeper *tk)
1638 {
1639 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1640 u64 boot_ns;
1641
1642 boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
1643
1644 write_seqcount_begin(&vdata->seq);
1645
1646
1647 vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
1648 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
1649 vdata->clock.mask = tk->tkr_mono.mask;
1650 vdata->clock.mult = tk->tkr_mono.mult;
1651 vdata->clock.shift = tk->tkr_mono.shift;
1652
1653 vdata->boot_ns = boot_ns;
1654 vdata->nsec_base = tk->tkr_mono.xtime_nsec;
1655
1656 vdata->wall_time_sec = tk->xtime_sec;
1657
1658 write_seqcount_end(&vdata->seq);
1659 }
1660 #endif
1661
1662 void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
1663 {
1664 kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1665 kvm_vcpu_kick(vcpu);
1666 }
1667
1668 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
1669 {
1670 int version;
1671 int r;
1672 struct pvclock_wall_clock wc;
1673 struct timespec64 boot;
1674
1675 if (!wall_clock)
1676 return;
1677
1678 r = kvm_read_guest(kvm, wall_clock, &version, sizeof(version));
1679 if (r)
1680 return;
1681
1682 if (version & 1)
1683 ++version;
1684
1685 ++version;
1686
1687 if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
1688 return;
1689
1690
1691
1692
1693
1694
1695
1696 getboottime64(&boot);
1697
1698 if (kvm->arch.kvmclock_offset) {
1699 struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
1700 boot = timespec64_sub(boot, ts);
1701 }
1702 wc.sec = (u32)boot.tv_sec;
1703 wc.nsec = boot.tv_nsec;
1704 wc.version = version;
1705
1706 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
1707
1708 version++;
1709 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
1710 }
1711
1712 static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
1713 {
1714 do_shl32_div32(dividend, divisor);
1715 return dividend;
1716 }
1717
1718 static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
1719 s8 *pshift, u32 *pmultiplier)
1720 {
1721 uint64_t scaled64;
1722 int32_t shift = 0;
1723 uint64_t tps64;
1724 uint32_t tps32;
1725
1726 tps64 = base_hz;
1727 scaled64 = scaled_hz;
1728 while (tps64 > scaled64*2 || tps64 & 0xffffffff00000000ULL) {
1729 tps64 >>= 1;
1730 shift--;
1731 }
1732
1733 tps32 = (uint32_t)tps64;
1734 while (tps32 <= scaled64 || scaled64 & 0xffffffff00000000ULL) {
1735 if (scaled64 & 0xffffffff00000000ULL || tps32 & 0x80000000)
1736 scaled64 >>= 1;
1737 else
1738 tps32 <<= 1;
1739 shift++;
1740 }
1741
1742 *pshift = shift;
1743 *pmultiplier = div_frac(scaled64, tps32);
1744 }
1745
1746 #ifdef CONFIG_X86_64
1747 static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
1748 #endif
1749
1750 static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
1751 static unsigned long max_tsc_khz;
1752
1753 static u32 adjust_tsc_khz(u32 khz, s32 ppm)
1754 {
1755 u64 v = (u64)khz * (1000000 + ppm);
1756 do_div(v, 1000000);
1757 return v;
1758 }
1759
1760 static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
1761 {
1762 u64 ratio;
1763
1764
1765 if (!scale) {
1766 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1767 return 0;
1768 }
1769
1770
1771 if (!kvm_has_tsc_control) {
1772 if (user_tsc_khz > tsc_khz) {
1773 vcpu->arch.tsc_catchup = 1;
1774 vcpu->arch.tsc_always_catchup = 1;
1775 return 0;
1776 } else {
1777 pr_warn_ratelimited("user requested TSC rate below hardware speed\n");
1778 return -1;
1779 }
1780 }
1781
1782
1783 ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits,
1784 user_tsc_khz, tsc_khz);
1785
1786 if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) {
1787 pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n",
1788 user_tsc_khz);
1789 return -1;
1790 }
1791
1792 vcpu->arch.tsc_scaling_ratio = ratio;
1793 return 0;
1794 }
1795
1796 static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
1797 {
1798 u32 thresh_lo, thresh_hi;
1799 int use_scaling = 0;
1800
1801
1802 if (user_tsc_khz == 0) {
1803
1804 vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
1805 return -1;
1806 }
1807
1808
1809 kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC,
1810 &vcpu->arch.virtual_tsc_shift,
1811 &vcpu->arch.virtual_tsc_mult);
1812 vcpu->arch.virtual_tsc_khz = user_tsc_khz;
1813
1814
1815
1816
1817
1818
1819
1820 thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
1821 thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
1822 if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
1823 pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
1824 use_scaling = 1;
1825 }
1826 return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
1827 }
1828
1829 static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
1830 {
1831 u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
1832 vcpu->arch.virtual_tsc_mult,
1833 vcpu->arch.virtual_tsc_shift);
1834 tsc += vcpu->arch.this_tsc_write;
1835 return tsc;
1836 }
1837
1838 static inline int gtod_is_based_on_tsc(int mode)
1839 {
1840 return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
1841 }
1842
1843 static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
1844 {
1845 #ifdef CONFIG_X86_64
1846 bool vcpus_matched;
1847 struct kvm_arch *ka = &vcpu->kvm->arch;
1848 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
1849
1850 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
1851 atomic_read(&vcpu->kvm->online_vcpus));
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861 if (ka->use_master_clock ||
1862 (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
1863 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1864
1865 trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
1866 atomic_read(&vcpu->kvm->online_vcpus),
1867 ka->use_master_clock, gtod->clock.vclock_mode);
1868 #endif
1869 }
1870
1871 static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
1872 {
1873 u64 curr_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1874 vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
1875 }
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887 static inline u64 __scale_tsc(u64 ratio, u64 tsc)
1888 {
1889 return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
1890 }
1891
1892 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
1893 {
1894 u64 _tsc = tsc;
1895 u64 ratio = vcpu->arch.tsc_scaling_ratio;
1896
1897 if (ratio != kvm_default_tsc_scaling_ratio)
1898 _tsc = __scale_tsc(ratio, tsc);
1899
1900 return _tsc;
1901 }
1902 EXPORT_SYMBOL_GPL(kvm_scale_tsc);
1903
1904 static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
1905 {
1906 u64 tsc;
1907
1908 tsc = kvm_scale_tsc(vcpu, rdtsc());
1909
1910 return target_tsc - tsc;
1911 }
1912
1913 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
1914 {
1915 u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
1916
1917 return tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
1918 }
1919 EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
1920
1921 static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1922 {
1923 vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset);
1924 }
1925
1926 static inline bool kvm_check_tsc_unstable(void)
1927 {
1928 #ifdef CONFIG_X86_64
1929
1930
1931
1932
1933 if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
1934 return false;
1935 #endif
1936 return check_tsc_unstable();
1937 }
1938
1939 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1940 {
1941 struct kvm *kvm = vcpu->kvm;
1942 u64 offset, ns, elapsed;
1943 unsigned long flags;
1944 bool matched;
1945 bool already_matched;
1946 u64 data = msr->data;
1947 bool synchronizing = false;
1948
1949 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
1950 offset = kvm_compute_tsc_offset(vcpu, data);
1951 ns = ktime_get_boottime_ns();
1952 elapsed = ns - kvm->arch.last_tsc_nsec;
1953
1954 if (vcpu->arch.virtual_tsc_khz) {
1955 if (data == 0 && msr->host_initiated) {
1956
1957
1958
1959
1960
1961 synchronizing = true;
1962 } else {
1963 u64 tsc_exp = kvm->arch.last_tsc_write +
1964 nsec_to_cycles(vcpu, elapsed);
1965 u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
1966
1967
1968
1969
1970
1971 synchronizing = data < tsc_exp + tsc_hz &&
1972 data + tsc_hz > tsc_exp;
1973 }
1974 }
1975
1976
1977
1978
1979
1980
1981
1982 if (synchronizing &&
1983 vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
1984 if (!kvm_check_tsc_unstable()) {
1985 offset = kvm->arch.cur_tsc_offset;
1986 } else {
1987 u64 delta = nsec_to_cycles(vcpu, elapsed);
1988 data += delta;
1989 offset = kvm_compute_tsc_offset(vcpu, data);
1990 }
1991 matched = true;
1992 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
1993 } else {
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003 kvm->arch.cur_tsc_generation++;
2004 kvm->arch.cur_tsc_nsec = ns;
2005 kvm->arch.cur_tsc_write = data;
2006 kvm->arch.cur_tsc_offset = offset;
2007 matched = false;
2008 }
2009
2010
2011
2012
2013
2014 kvm->arch.last_tsc_nsec = ns;
2015 kvm->arch.last_tsc_write = data;
2016 kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
2017
2018 vcpu->arch.last_guest_tsc = data;
2019
2020
2021 vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
2022 vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
2023 vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
2024
2025 if (!msr->host_initiated && guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST))
2026 update_ia32_tsc_adjust_msr(vcpu, offset);
2027
2028 kvm_vcpu_write_tsc_offset(vcpu, offset);
2029 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
2030
2031 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
2032 if (!matched) {
2033 kvm->arch.nr_vcpus_matched_tsc = 0;
2034 } else if (!already_matched) {
2035 kvm->arch.nr_vcpus_matched_tsc++;
2036 }
2037
2038 kvm_track_tsc_matching(vcpu);
2039 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
2040 }
2041
2042 EXPORT_SYMBOL_GPL(kvm_write_tsc);
2043
2044 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
2045 s64 adjustment)
2046 {
2047 u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
2048 kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
2049 }
2050
2051 static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
2052 {
2053 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
2054 WARN_ON(adjustment < 0);
2055 adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
2056 adjust_tsc_offset_guest(vcpu, adjustment);
2057 }
2058
2059 #ifdef CONFIG_X86_64
2060
2061 static u64 read_tsc(void)
2062 {
2063 u64 ret = (u64)rdtsc_ordered();
2064 u64 last = pvclock_gtod_data.clock.cycle_last;
2065
2066 if (likely(ret >= last))
2067 return ret;
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077 asm volatile ("");
2078 return last;
2079 }
2080
2081 static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
2082 {
2083 long v;
2084 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2085 u64 tsc_pg_val;
2086
2087 switch (gtod->clock.vclock_mode) {
2088 case VCLOCK_HVCLOCK:
2089 tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
2090 tsc_timestamp);
2091 if (tsc_pg_val != U64_MAX) {
2092
2093 *mode = VCLOCK_HVCLOCK;
2094 v = (tsc_pg_val - gtod->clock.cycle_last) &
2095 gtod->clock.mask;
2096 } else {
2097
2098 *mode = VCLOCK_NONE;
2099 }
2100 break;
2101 case VCLOCK_TSC:
2102 *mode = VCLOCK_TSC;
2103 *tsc_timestamp = read_tsc();
2104 v = (*tsc_timestamp - gtod->clock.cycle_last) &
2105 gtod->clock.mask;
2106 break;
2107 default:
2108 *mode = VCLOCK_NONE;
2109 }
2110
2111 if (*mode == VCLOCK_NONE)
2112 *tsc_timestamp = v = 0;
2113
2114 return v * gtod->clock.mult;
2115 }
2116
2117 static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
2118 {
2119 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2120 unsigned long seq;
2121 int mode;
2122 u64 ns;
2123
2124 do {
2125 seq = read_seqcount_begin(>od->seq);
2126 ns = gtod->nsec_base;
2127 ns += vgettsc(tsc_timestamp, &mode);
2128 ns >>= gtod->clock.shift;
2129 ns += gtod->boot_ns;
2130 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2131 *t = ns;
2132
2133 return mode;
2134 }
2135
2136 static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
2137 {
2138 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
2139 unsigned long seq;
2140 int mode;
2141 u64 ns;
2142
2143 do {
2144 seq = read_seqcount_begin(>od->seq);
2145 ts->tv_sec = gtod->wall_time_sec;
2146 ns = gtod->nsec_base;
2147 ns += vgettsc(tsc_timestamp, &mode);
2148 ns >>= gtod->clock.shift;
2149 } while (unlikely(read_seqcount_retry(>od->seq, seq)));
2150
2151 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
2152 ts->tv_nsec = ns;
2153
2154 return mode;
2155 }
2156
2157
2158 static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
2159 {
2160
2161 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2162 return false;
2163
2164 return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
2165 tsc_timestamp));
2166 }
2167
2168
2169 static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
2170 u64 *tsc_timestamp)
2171 {
2172
2173 if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
2174 return false;
2175
2176 return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
2177 }
2178 #endif
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221 static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
2222 {
2223 #ifdef CONFIG_X86_64
2224 struct kvm_arch *ka = &kvm->arch;
2225 int vclock_mode;
2226 bool host_tsc_clocksource, vcpus_matched;
2227
2228 vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
2229 atomic_read(&kvm->online_vcpus));
2230
2231
2232
2233
2234
2235 host_tsc_clocksource = kvm_get_time_and_clockread(
2236 &ka->master_kernel_ns,
2237 &ka->master_cycle_now);
2238
2239 ka->use_master_clock = host_tsc_clocksource && vcpus_matched
2240 && !ka->backwards_tsc_observed
2241 && !ka->boot_vcpu_runs_old_kvmclock;
2242
2243 if (ka->use_master_clock)
2244 atomic_set(&kvm_guest_has_master_clock, 1);
2245
2246 vclock_mode = pvclock_gtod_data.clock.vclock_mode;
2247 trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
2248 vcpus_matched);
2249 #endif
2250 }
2251
2252 void kvm_make_mclock_inprogress_request(struct kvm *kvm)
2253 {
2254 kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
2255 }
2256
2257 static void kvm_gen_update_masterclock(struct kvm *kvm)
2258 {
2259 #ifdef CONFIG_X86_64
2260 int i;
2261 struct kvm_vcpu *vcpu;
2262 struct kvm_arch *ka = &kvm->arch;
2263
2264 spin_lock(&ka->pvclock_gtod_sync_lock);
2265 kvm_make_mclock_inprogress_request(kvm);
2266
2267 pvclock_update_vm_gtod_copy(kvm);
2268
2269 kvm_for_each_vcpu(i, vcpu, kvm)
2270 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2271
2272
2273 kvm_for_each_vcpu(i, vcpu, kvm)
2274 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
2275
2276 spin_unlock(&ka->pvclock_gtod_sync_lock);
2277 #endif
2278 }
2279
2280 u64 get_kvmclock_ns(struct kvm *kvm)
2281 {
2282 struct kvm_arch *ka = &kvm->arch;
2283 struct pvclock_vcpu_time_info hv_clock;
2284 u64 ret;
2285
2286 spin_lock(&ka->pvclock_gtod_sync_lock);
2287 if (!ka->use_master_clock) {
2288 spin_unlock(&ka->pvclock_gtod_sync_lock);
2289 return ktime_get_boottime_ns() + ka->kvmclock_offset;
2290 }
2291
2292 hv_clock.tsc_timestamp = ka->master_cycle_now;
2293 hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
2294 spin_unlock(&ka->pvclock_gtod_sync_lock);
2295
2296
2297 get_cpu();
2298
2299 if (__this_cpu_read(cpu_tsc_khz)) {
2300 kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
2301 &hv_clock.tsc_shift,
2302 &hv_clock.tsc_to_system_mul);
2303 ret = __pvclock_read_cycles(&hv_clock, rdtsc());
2304 } else
2305 ret = ktime_get_boottime_ns() + ka->kvmclock_offset;
2306
2307 put_cpu();
2308
2309 return ret;
2310 }
2311
2312 static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
2313 {
2314 struct kvm_vcpu_arch *vcpu = &v->arch;
2315 struct pvclock_vcpu_time_info guest_hv_clock;
2316
2317 if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
2318 &guest_hv_clock, sizeof(guest_hv_clock))))
2319 return;
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
2336
2337 if (guest_hv_clock.version & 1)
2338 ++guest_hv_clock.version;
2339
2340 vcpu->hv_clock.version = guest_hv_clock.version + 1;
2341 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2342 &vcpu->hv_clock,
2343 sizeof(vcpu->hv_clock.version));
2344
2345 smp_wmb();
2346
2347
2348 vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
2349
2350 if (vcpu->pvclock_set_guest_stopped_request) {
2351 vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
2352 vcpu->pvclock_set_guest_stopped_request = false;
2353 }
2354
2355 trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
2356
2357 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2358 &vcpu->hv_clock,
2359 sizeof(vcpu->hv_clock));
2360
2361 smp_wmb();
2362
2363 vcpu->hv_clock.version++;
2364 kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
2365 &vcpu->hv_clock,
2366 sizeof(vcpu->hv_clock.version));
2367 }
2368
2369 static int kvm_guest_time_update(struct kvm_vcpu *v)
2370 {
2371 unsigned long flags, tgt_tsc_khz;
2372 struct kvm_vcpu_arch *vcpu = &v->arch;
2373 struct kvm_arch *ka = &v->kvm->arch;
2374 s64 kernel_ns;
2375 u64 tsc_timestamp, host_tsc;
2376 u8 pvclock_flags;
2377 bool use_master_clock;
2378
2379 kernel_ns = 0;
2380 host_tsc = 0;
2381
2382
2383
2384
2385
2386 spin_lock(&ka->pvclock_gtod_sync_lock);
2387 use_master_clock = ka->use_master_clock;
2388 if (use_master_clock) {
2389 host_tsc = ka->master_cycle_now;
2390 kernel_ns = ka->master_kernel_ns;
2391 }
2392 spin_unlock(&ka->pvclock_gtod_sync_lock);
2393
2394
2395 local_irq_save(flags);
2396 tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
2397 if (unlikely(tgt_tsc_khz == 0)) {
2398 local_irq_restore(flags);
2399 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2400 return 1;
2401 }
2402 if (!use_master_clock) {
2403 host_tsc = rdtsc();
2404 kernel_ns = ktime_get_boottime_ns();
2405 }
2406
2407 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419 if (vcpu->tsc_catchup) {
2420 u64 tsc = compute_guest_tsc(v, kernel_ns);
2421 if (tsc > tsc_timestamp) {
2422 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
2423 tsc_timestamp = tsc;
2424 }
2425 }
2426
2427 local_irq_restore(flags);
2428
2429
2430
2431 if (kvm_has_tsc_control)
2432 tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
2433
2434 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
2435 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
2436 &vcpu->hv_clock.tsc_shift,
2437 &vcpu->hv_clock.tsc_to_system_mul);
2438 vcpu->hw_tsc_khz = tgt_tsc_khz;
2439 }
2440
2441 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
2442 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
2443 vcpu->last_guest_tsc = tsc_timestamp;
2444
2445
2446 pvclock_flags = 0;
2447 if (use_master_clock)
2448 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
2449
2450 vcpu->hv_clock.flags = pvclock_flags;
2451
2452 if (vcpu->pv_time_enabled)
2453 kvm_setup_pvclock_page(v);
2454 if (v == kvm_get_vcpu(v->kvm, 0))
2455 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
2456 return 0;
2457 }
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473 #define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
2474
2475 static void kvmclock_update_fn(struct work_struct *work)
2476 {
2477 int i;
2478 struct delayed_work *dwork = to_delayed_work(work);
2479 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2480 kvmclock_update_work);
2481 struct kvm *kvm = container_of(ka, struct kvm, arch);
2482 struct kvm_vcpu *vcpu;
2483
2484 kvm_for_each_vcpu(i, vcpu, kvm) {
2485 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
2486 kvm_vcpu_kick(vcpu);
2487 }
2488 }
2489
2490 static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
2491 {
2492 struct kvm *kvm = v->kvm;
2493
2494 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
2495 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
2496 KVMCLOCK_UPDATE_DELAY);
2497 }
2498
2499 #define KVMCLOCK_SYNC_PERIOD (300 * HZ)
2500
2501 static void kvmclock_sync_fn(struct work_struct *work)
2502 {
2503 struct delayed_work *dwork = to_delayed_work(work);
2504 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
2505 kvmclock_sync_work);
2506 struct kvm *kvm = container_of(ka, struct kvm, arch);
2507
2508 if (!kvmclock_periodic_sync)
2509 return;
2510
2511 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
2512 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
2513 KVMCLOCK_SYNC_PERIOD);
2514 }
2515
2516
2517
2518
2519 static bool can_set_mci_status(struct kvm_vcpu *vcpu)
2520 {
2521
2522 if (guest_cpuid_is_amd(vcpu))
2523 return !!(vcpu->arch.msr_hwcr & BIT_ULL(18));
2524
2525 return false;
2526 }
2527
2528 static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2529 {
2530 u64 mcg_cap = vcpu->arch.mcg_cap;
2531 unsigned bank_num = mcg_cap & 0xff;
2532 u32 msr = msr_info->index;
2533 u64 data = msr_info->data;
2534
2535 switch (msr) {
2536 case MSR_IA32_MCG_STATUS:
2537 vcpu->arch.mcg_status = data;
2538 break;
2539 case MSR_IA32_MCG_CTL:
2540 if (!(mcg_cap & MCG_CTL_P) &&
2541 (data || !msr_info->host_initiated))
2542 return 1;
2543 if (data != 0 && data != ~(u64)0)
2544 return 1;
2545 vcpu->arch.mcg_ctl = data;
2546 break;
2547 default:
2548 if (msr >= MSR_IA32_MC0_CTL &&
2549 msr < MSR_IA32_MCx_CTL(bank_num)) {
2550 u32 offset = array_index_nospec(
2551 msr - MSR_IA32_MC0_CTL,
2552 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
2553
2554
2555
2556
2557
2558
2559 if ((offset & 0x3) == 0 &&
2560 data != 0 && (data | (1 << 10)) != ~(u64)0)
2561 return -1;
2562
2563
2564 if (!msr_info->host_initiated &&
2565 (offset & 0x3) == 1 && data != 0) {
2566 if (!can_set_mci_status(vcpu))
2567 return -1;
2568 }
2569
2570 vcpu->arch.mce_banks[offset] = data;
2571 break;
2572 }
2573 return 1;
2574 }
2575 return 0;
2576 }
2577
2578 static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
2579 {
2580 struct kvm *kvm = vcpu->kvm;
2581 int lm = is_long_mode(vcpu);
2582 u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
2583 : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
2584 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
2585 : kvm->arch.xen_hvm_config.blob_size_32;
2586 u32 page_num = data & ~PAGE_MASK;
2587 u64 page_addr = data & PAGE_MASK;
2588 u8 *page;
2589 int r;
2590
2591 r = -E2BIG;
2592 if (page_num >= blob_size)
2593 goto out;
2594 r = -ENOMEM;
2595 page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
2596 if (IS_ERR(page)) {
2597 r = PTR_ERR(page);
2598 goto out;
2599 }
2600 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE))
2601 goto out_free;
2602 r = 0;
2603 out_free:
2604 kfree(page);
2605 out:
2606 return r;
2607 }
2608
2609 static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
2610 {
2611 gpa_t gpa = data & ~0x3f;
2612
2613
2614 if (data & 0x38)
2615 return 1;
2616
2617 vcpu->arch.apf.msr_val = data;
2618
2619 if (!(data & KVM_ASYNC_PF_ENABLED)) {
2620 kvm_clear_async_pf_completion_queue(vcpu);
2621 kvm_async_pf_hash_reset(vcpu);
2622 return 0;
2623 }
2624
2625 if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
2626 sizeof(u32)))
2627 return 1;
2628
2629 vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
2630 vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
2631 kvm_async_pf_wakeup_all(vcpu);
2632 return 0;
2633 }
2634
2635 static void kvmclock_reset(struct kvm_vcpu *vcpu)
2636 {
2637 vcpu->arch.pv_time_enabled = false;
2638 vcpu->arch.time = 0;
2639 }
2640
2641 static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
2642 {
2643 ++vcpu->stat.tlb_flush;
2644 kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
2645 }
2646
2647 static void record_steal_time(struct kvm_vcpu *vcpu)
2648 {
2649 struct kvm_host_map map;
2650 struct kvm_steal_time *st;
2651
2652 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2653 return;
2654
2655
2656 if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
2657 &map, &vcpu->arch.st.cache, false))
2658 return;
2659
2660 st = map.hva +
2661 offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
2662
2663
2664
2665
2666
2667 trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
2668 st->preempted & KVM_VCPU_FLUSH_TLB);
2669 if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
2670 kvm_vcpu_flush_tlb(vcpu, false);
2671
2672 vcpu->arch.st.preempted = 0;
2673
2674 if (st->version & 1)
2675 st->version += 1;
2676
2677 st->version += 1;
2678
2679 smp_wmb();
2680
2681 st->steal += current->sched_info.run_delay -
2682 vcpu->arch.st.last_steal;
2683 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2684
2685 smp_wmb();
2686
2687 st->version += 1;
2688
2689 kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
2690 }
2691
2692 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2693 {
2694 bool pr = false;
2695 u32 msr = msr_info->index;
2696 u64 data = msr_info->data;
2697
2698 switch (msr) {
2699 case MSR_AMD64_NB_CFG:
2700 case MSR_IA32_UCODE_WRITE:
2701 case MSR_VM_HSAVE_PA:
2702 case MSR_AMD64_PATCH_LOADER:
2703 case MSR_AMD64_BU_CFG2:
2704 case MSR_AMD64_DC_CFG:
2705 case MSR_F15H_EX_CFG:
2706 break;
2707
2708 case MSR_IA32_UCODE_REV:
2709 if (msr_info->host_initiated)
2710 vcpu->arch.microcode_version = data;
2711 break;
2712 case MSR_IA32_ARCH_CAPABILITIES:
2713 if (!msr_info->host_initiated)
2714 return 1;
2715 vcpu->arch.arch_capabilities = data;
2716 break;
2717 case MSR_EFER:
2718 return set_efer(vcpu, msr_info);
2719 case MSR_K7_HWCR:
2720 data &= ~(u64)0x40;
2721 data &= ~(u64)0x100;
2722 data &= ~(u64)0x8;
2723
2724
2725 if (data == BIT_ULL(18)) {
2726 vcpu->arch.msr_hwcr = data;
2727 } else if (data != 0) {
2728 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
2729 data);
2730 return 1;
2731 }
2732 break;
2733 case MSR_FAM10H_MMIO_CONF_BASE:
2734 if (data != 0) {
2735 vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
2736 "0x%llx\n", data);
2737 return 1;
2738 }
2739 break;
2740 case MSR_IA32_DEBUGCTLMSR:
2741 if (!data) {
2742
2743 break;
2744 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
2745
2746
2747 return 1;
2748 }
2749 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
2750 __func__, data);
2751 break;
2752 case 0x200 ... 0x2ff:
2753 return kvm_mtrr_set_msr(vcpu, msr, data);
2754 case MSR_IA32_APICBASE:
2755 return kvm_set_apic_base(vcpu, msr_info);
2756 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
2757 return kvm_x2apic_msr_write(vcpu, msr, data);
2758 case MSR_IA32_TSCDEADLINE:
2759 kvm_set_lapic_tscdeadline_msr(vcpu, data);
2760 break;
2761 case MSR_IA32_TSC_ADJUST:
2762 if (guest_cpuid_has(vcpu, X86_FEATURE_TSC_ADJUST)) {
2763 if (!msr_info->host_initiated) {
2764 s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
2765 adjust_tsc_offset_guest(vcpu, adj);
2766 }
2767 vcpu->arch.ia32_tsc_adjust_msr = data;
2768 }
2769 break;
2770 case MSR_IA32_MISC_ENABLE:
2771 if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
2772 ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
2773 if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
2774 return 1;
2775 vcpu->arch.ia32_misc_enable_msr = data;
2776 kvm_update_cpuid(vcpu);
2777 } else {
2778 vcpu->arch.ia32_misc_enable_msr = data;
2779 }
2780 break;
2781 case MSR_IA32_SMBASE:
2782 if (!msr_info->host_initiated)
2783 return 1;
2784 vcpu->arch.smbase = data;
2785 break;
2786 case MSR_IA32_POWER_CTL:
2787 vcpu->arch.msr_ia32_power_ctl = data;
2788 break;
2789 case MSR_IA32_TSC:
2790 kvm_write_tsc(vcpu, msr_info);
2791 break;
2792 case MSR_SMI_COUNT:
2793 if (!msr_info->host_initiated)
2794 return 1;
2795 vcpu->arch.smi_count = data;
2796 break;
2797 case MSR_KVM_WALL_CLOCK_NEW:
2798 case MSR_KVM_WALL_CLOCK:
2799 vcpu->kvm->arch.wall_clock = data;
2800 kvm_write_wall_clock(vcpu->kvm, data);
2801 break;
2802 case MSR_KVM_SYSTEM_TIME_NEW:
2803 case MSR_KVM_SYSTEM_TIME: {
2804 struct kvm_arch *ka = &vcpu->kvm->arch;
2805
2806 if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
2807 bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
2808
2809 if (ka->boot_vcpu_runs_old_kvmclock != tmp)
2810 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
2811
2812 ka->boot_vcpu_runs_old_kvmclock = tmp;
2813 }
2814
2815 vcpu->arch.time = data;
2816 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
2817
2818
2819 vcpu->arch.pv_time_enabled = false;
2820 if (!(data & 1))
2821 break;
2822
2823 if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
2824 &vcpu->arch.pv_time, data & ~1ULL,
2825 sizeof(struct pvclock_vcpu_time_info)))
2826 vcpu->arch.pv_time_enabled = true;
2827
2828 break;
2829 }
2830 case MSR_KVM_ASYNC_PF_EN:
2831 if (kvm_pv_enable_async_pf(vcpu, data))
2832 return 1;
2833 break;
2834 case MSR_KVM_STEAL_TIME:
2835
2836 if (unlikely(!sched_info_on()))
2837 return 1;
2838
2839 if (data & KVM_STEAL_RESERVED_MASK)
2840 return 1;
2841
2842 vcpu->arch.st.msr_val = data;
2843
2844 if (!(data & KVM_MSR_ENABLED))
2845 break;
2846
2847 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
2848
2849 break;
2850 case MSR_KVM_PV_EOI_EN:
2851 if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
2852 return 1;
2853 break;
2854
2855 case MSR_KVM_POLL_CONTROL:
2856
2857 if (data & (-1ULL << 1))
2858 return 1;
2859
2860 vcpu->arch.msr_kvm_poll_control = data;
2861 break;
2862
2863 case MSR_IA32_MCG_CTL:
2864 case MSR_IA32_MCG_STATUS:
2865 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
2866 return set_msr_mce(vcpu, msr_info);
2867
2868 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
2869 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
2870 pr = true;
2871 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
2872 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
2873 if (kvm_pmu_is_valid_msr(vcpu, msr))
2874 return kvm_pmu_set_msr(vcpu, msr_info);
2875
2876 if (pr || data != 0)
2877 vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
2878 "0x%x data 0x%llx\n", msr, data);
2879 break;
2880 case MSR_K7_CLK_CTL:
2881
2882
2883
2884
2885
2886
2887
2888
2889 break;
2890 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
2891 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
2892 case HV_X64_MSR_CRASH_CTL:
2893 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
2894 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
2895 case HV_X64_MSR_TSC_EMULATION_CONTROL:
2896 case HV_X64_MSR_TSC_EMULATION_STATUS:
2897 return kvm_hv_set_msr_common(vcpu, msr, data,
2898 msr_info->host_initiated);
2899 case MSR_IA32_BBL_CR_CTL3:
2900
2901
2902
2903 if (report_ignored_msrs)
2904 vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
2905 msr, data);
2906 break;
2907 case MSR_AMD64_OSVW_ID_LENGTH:
2908 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2909 return 1;
2910 vcpu->arch.osvw.length = data;
2911 break;
2912 case MSR_AMD64_OSVW_STATUS:
2913 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
2914 return 1;
2915 vcpu->arch.osvw.status = data;
2916 break;
2917 case MSR_PLATFORM_INFO:
2918 if (!msr_info->host_initiated ||
2919 (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
2920 cpuid_fault_enabled(vcpu)))
2921 return 1;
2922 vcpu->arch.msr_platform_info = data;
2923 break;
2924 case MSR_MISC_FEATURES_ENABLES:
2925 if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
2926 (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
2927 !supports_cpuid_fault(vcpu)))
2928 return 1;
2929 vcpu->arch.msr_misc_features_enables = data;
2930 break;
2931 default:
2932 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
2933 return xen_hvm_config(vcpu, data);
2934 if (kvm_pmu_is_valid_msr(vcpu, msr))
2935 return kvm_pmu_set_msr(vcpu, msr_info);
2936 if (!ignore_msrs) {
2937 vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
2938 msr, data);
2939 return 1;
2940 } else {
2941 if (report_ignored_msrs)
2942 vcpu_unimpl(vcpu,
2943 "ignored wrmsr: 0x%x data 0x%llx\n",
2944 msr, data);
2945 break;
2946 }
2947 }
2948 return 0;
2949 }
2950 EXPORT_SYMBOL_GPL(kvm_set_msr_common);
2951
2952 static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
2953 {
2954 u64 data;
2955 u64 mcg_cap = vcpu->arch.mcg_cap;
2956 unsigned bank_num = mcg_cap & 0xff;
2957
2958 switch (msr) {
2959 case MSR_IA32_P5_MC_ADDR:
2960 case MSR_IA32_P5_MC_TYPE:
2961 data = 0;
2962 break;
2963 case MSR_IA32_MCG_CAP:
2964 data = vcpu->arch.mcg_cap;
2965 break;
2966 case MSR_IA32_MCG_CTL:
2967 if (!(mcg_cap & MCG_CTL_P) && !host)
2968 return 1;
2969 data = vcpu->arch.mcg_ctl;
2970 break;
2971 case MSR_IA32_MCG_STATUS:
2972 data = vcpu->arch.mcg_status;
2973 break;
2974 default:
2975 if (msr >= MSR_IA32_MC0_CTL &&
2976 msr < MSR_IA32_MCx_CTL(bank_num)) {
2977 u32 offset = array_index_nospec(
2978 msr - MSR_IA32_MC0_CTL,
2979 MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
2980
2981 data = vcpu->arch.mce_banks[offset];
2982 break;
2983 }
2984 return 1;
2985 }
2986 *pdata = data;
2987 return 0;
2988 }
2989
2990 int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2991 {
2992 switch (msr_info->index) {
2993 case MSR_IA32_PLATFORM_ID:
2994 case MSR_IA32_EBL_CR_POWERON:
2995 case MSR_IA32_DEBUGCTLMSR:
2996 case MSR_IA32_LASTBRANCHFROMIP:
2997 case MSR_IA32_LASTBRANCHTOIP:
2998 case MSR_IA32_LASTINTFROMIP:
2999 case MSR_IA32_LASTINTTOIP:
3000 case MSR_K8_SYSCFG:
3001 case MSR_K8_TSEG_ADDR:
3002 case MSR_K8_TSEG_MASK:
3003 case MSR_VM_HSAVE_PA:
3004 case MSR_K8_INT_PENDING_MSG:
3005 case MSR_AMD64_NB_CFG:
3006 case MSR_FAM10H_MMIO_CONF_BASE:
3007 case MSR_AMD64_BU_CFG2:
3008 case MSR_IA32_PERF_CTL:
3009 case MSR_AMD64_DC_CFG:
3010 case MSR_F15H_EX_CFG:
3011 msr_info->data = 0;
3012 break;
3013 case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
3014 case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
3015 case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
3016 case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
3017 case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1:
3018 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3019 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
3020 msr_info->data = 0;
3021 break;
3022 case MSR_IA32_UCODE_REV:
3023 msr_info->data = vcpu->arch.microcode_version;
3024 break;
3025 case MSR_IA32_ARCH_CAPABILITIES:
3026 if (!msr_info->host_initiated &&
3027 !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES))
3028 return 1;
3029 msr_info->data = vcpu->arch.arch_capabilities;
3030 break;
3031 case MSR_IA32_POWER_CTL:
3032 msr_info->data = vcpu->arch.msr_ia32_power_ctl;
3033 break;
3034 case MSR_IA32_TSC:
3035 msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
3036 break;
3037 case MSR_MTRRcap:
3038 case 0x200 ... 0x2ff:
3039 return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
3040 case 0xcd:
3041 msr_info->data = 3;
3042 break;
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054 case MSR_EBC_FREQUENCY_ID:
3055 msr_info->data = 1 << 24;
3056 break;
3057 case MSR_IA32_APICBASE:
3058 msr_info->data = kvm_get_apic_base(vcpu);
3059 break;
3060 case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
3061 return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data);
3062 break;
3063 case MSR_IA32_TSCDEADLINE:
3064 msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu);
3065 break;
3066 case MSR_IA32_TSC_ADJUST:
3067 msr_info->data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
3068 break;
3069 case MSR_IA32_MISC_ENABLE:
3070 msr_info->data = vcpu->arch.ia32_misc_enable_msr;
3071 break;
3072 case MSR_IA32_SMBASE:
3073 if (!msr_info->host_initiated)
3074 return 1;
3075 msr_info->data = vcpu->arch.smbase;
3076 break;
3077 case MSR_SMI_COUNT:
3078 msr_info->data = vcpu->arch.smi_count;
3079 break;
3080 case MSR_IA32_PERF_STATUS:
3081
3082 msr_info->data = 1000ULL;
3083
3084 msr_info->data |= (((uint64_t)4ULL) << 40);
3085 break;
3086 case MSR_EFER:
3087 msr_info->data = vcpu->arch.efer;
3088 break;
3089 case MSR_KVM_WALL_CLOCK:
3090 case MSR_KVM_WALL_CLOCK_NEW:
3091 msr_info->data = vcpu->kvm->arch.wall_clock;
3092 break;
3093 case MSR_KVM_SYSTEM_TIME:
3094 case MSR_KVM_SYSTEM_TIME_NEW:
3095 msr_info->data = vcpu->arch.time;
3096 break;
3097 case MSR_KVM_ASYNC_PF_EN:
3098 msr_info->data = vcpu->arch.apf.msr_val;
3099 break;
3100 case MSR_KVM_STEAL_TIME:
3101 msr_info->data = vcpu->arch.st.msr_val;
3102 break;
3103 case MSR_KVM_PV_EOI_EN:
3104 msr_info->data = vcpu->arch.pv_eoi.msr_val;
3105 break;
3106 case MSR_KVM_POLL_CONTROL:
3107 msr_info->data = vcpu->arch.msr_kvm_poll_control;
3108 break;
3109 case MSR_IA32_P5_MC_ADDR:
3110 case MSR_IA32_P5_MC_TYPE:
3111 case MSR_IA32_MCG_CAP:
3112 case MSR_IA32_MCG_CTL:
3113 case MSR_IA32_MCG_STATUS:
3114 case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
3115 return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
3116 msr_info->host_initiated);
3117 case MSR_K7_CLK_CTL:
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127 msr_info->data = 0x20000000;
3128 break;
3129 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
3130 case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
3131 case HV_X64_MSR_CRASH_CTL:
3132 case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
3133 case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
3134 case HV_X64_MSR_TSC_EMULATION_CONTROL:
3135 case HV_X64_MSR_TSC_EMULATION_STATUS:
3136 return kvm_hv_get_msr_common(vcpu,
3137 msr_info->index, &msr_info->data,
3138 msr_info->host_initiated);
3139 break;
3140 case MSR_IA32_BBL_CR_CTL3:
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151 msr_info->data = 0xbe702111;
3152 break;
3153 case MSR_AMD64_OSVW_ID_LENGTH:
3154 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3155 return 1;
3156 msr_info->data = vcpu->arch.osvw.length;
3157 break;
3158 case MSR_AMD64_OSVW_STATUS:
3159 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
3160 return 1;
3161 msr_info->data = vcpu->arch.osvw.status;
3162 break;
3163 case MSR_PLATFORM_INFO:
3164 if (!msr_info->host_initiated &&
3165 !vcpu->kvm->arch.guest_can_read_msr_platform_info)
3166 return 1;
3167 msr_info->data = vcpu->arch.msr_platform_info;
3168 break;
3169 case MSR_MISC_FEATURES_ENABLES:
3170 msr_info->data = vcpu->arch.msr_misc_features_enables;
3171 break;
3172 case MSR_K7_HWCR:
3173 msr_info->data = vcpu->arch.msr_hwcr;
3174 break;
3175 default:
3176 if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
3177 return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
3178 if (!ignore_msrs) {
3179 vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
3180 msr_info->index);
3181 return 1;
3182 } else {
3183 if (report_ignored_msrs)
3184 vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
3185 msr_info->index);
3186 msr_info->data = 0;
3187 }
3188 break;
3189 }
3190 return 0;
3191 }
3192 EXPORT_SYMBOL_GPL(kvm_get_msr_common);
3193
3194
3195
3196
3197
3198
3199 static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
3200 struct kvm_msr_entry *entries,
3201 int (*do_msr)(struct kvm_vcpu *vcpu,
3202 unsigned index, u64 *data))
3203 {
3204 int i;
3205
3206 for (i = 0; i < msrs->nmsrs; ++i)
3207 if (do_msr(vcpu, entries[i].index, &entries[i].data))
3208 break;
3209
3210 return i;
3211 }
3212
3213
3214
3215
3216
3217
3218 static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
3219 int (*do_msr)(struct kvm_vcpu *vcpu,
3220 unsigned index, u64 *data),
3221 int writeback)
3222 {
3223 struct kvm_msrs msrs;
3224 struct kvm_msr_entry *entries;
3225 int r, n;
3226 unsigned size;
3227
3228 r = -EFAULT;
3229 if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
3230 goto out;
3231
3232 r = -E2BIG;
3233 if (msrs.nmsrs >= MAX_IO_MSRS)
3234 goto out;
3235
3236 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
3237 entries = memdup_user(user_msrs->entries, size);
3238 if (IS_ERR(entries)) {
3239 r = PTR_ERR(entries);
3240 goto out;
3241 }
3242
3243 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
3244 if (r < 0)
3245 goto out_free;
3246
3247 r = -EFAULT;
3248 if (writeback && copy_to_user(user_msrs->entries, entries, size))
3249 goto out_free;
3250
3251 r = n;
3252
3253 out_free:
3254 kfree(entries);
3255 out:
3256 return r;
3257 }
3258
3259 static inline bool kvm_can_mwait_in_guest(void)
3260 {
3261 return boot_cpu_has(X86_FEATURE_MWAIT) &&
3262 !boot_cpu_has_bug(X86_BUG_MONITOR) &&
3263 boot_cpu_has(X86_FEATURE_ARAT);
3264 }
3265
3266 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
3267 {
3268 int r = 0;
3269
3270 switch (ext) {
3271 case KVM_CAP_IRQCHIP:
3272 case KVM_CAP_HLT:
3273 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
3274 case KVM_CAP_SET_TSS_ADDR:
3275 case KVM_CAP_EXT_CPUID:
3276 case KVM_CAP_EXT_EMUL_CPUID:
3277 case KVM_CAP_CLOCKSOURCE:
3278 case KVM_CAP_PIT:
3279 case KVM_CAP_NOP_IO_DELAY:
3280 case KVM_CAP_MP_STATE:
3281 case KVM_CAP_SYNC_MMU:
3282 case KVM_CAP_USER_NMI:
3283 case KVM_CAP_REINJECT_CONTROL:
3284 case KVM_CAP_IRQ_INJECT_STATUS:
3285 case KVM_CAP_IOEVENTFD:
3286 case KVM_CAP_IOEVENTFD_NO_LENGTH:
3287 case KVM_CAP_PIT2:
3288 case KVM_CAP_PIT_STATE2:
3289 case KVM_CAP_SET_IDENTITY_MAP_ADDR:
3290 case KVM_CAP_XEN_HVM:
3291 case KVM_CAP_VCPU_EVENTS:
3292 case KVM_CAP_HYPERV:
3293 case KVM_CAP_HYPERV_VAPIC:
3294 case KVM_CAP_HYPERV_SPIN:
3295 case KVM_CAP_HYPERV_SYNIC:
3296 case KVM_CAP_HYPERV_SYNIC2:
3297 case KVM_CAP_HYPERV_VP_INDEX:
3298 case KVM_CAP_HYPERV_EVENTFD:
3299 case KVM_CAP_HYPERV_TLBFLUSH:
3300 case KVM_CAP_HYPERV_SEND_IPI:
3301 case KVM_CAP_HYPERV_CPUID:
3302 case KVM_CAP_PCI_SEGMENT:
3303 case KVM_CAP_DEBUGREGS:
3304 case KVM_CAP_X86_ROBUST_SINGLESTEP:
3305 case KVM_CAP_XSAVE:
3306 case KVM_CAP_ASYNC_PF:
3307 case KVM_CAP_GET_TSC_KHZ:
3308 case KVM_CAP_KVMCLOCK_CTRL:
3309 case KVM_CAP_READONLY_MEM:
3310 case KVM_CAP_HYPERV_TIME:
3311 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
3312 case KVM_CAP_TSC_DEADLINE_TIMER:
3313 case KVM_CAP_DISABLE_QUIRKS:
3314 case KVM_CAP_SET_BOOT_CPU_ID:
3315 case KVM_CAP_SPLIT_IRQCHIP:
3316 case KVM_CAP_IMMEDIATE_EXIT:
3317 case KVM_CAP_PMU_EVENT_FILTER:
3318 case KVM_CAP_GET_MSR_FEATURES:
3319 case KVM_CAP_MSR_PLATFORM_INFO:
3320 case KVM_CAP_EXCEPTION_PAYLOAD:
3321 r = 1;
3322 break;
3323 case KVM_CAP_SYNC_REGS:
3324 r = KVM_SYNC_X86_VALID_FIELDS;
3325 break;
3326 case KVM_CAP_ADJUST_CLOCK:
3327 r = KVM_CLOCK_TSC_STABLE;
3328 break;
3329 case KVM_CAP_X86_DISABLE_EXITS:
3330 r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
3331 KVM_X86_DISABLE_EXITS_CSTATE;
3332 if(kvm_can_mwait_in_guest())
3333 r |= KVM_X86_DISABLE_EXITS_MWAIT;
3334 break;
3335 case KVM_CAP_X86_SMM:
3336
3337
3338
3339
3340
3341
3342
3343
3344 r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
3345 break;
3346 case KVM_CAP_VAPIC:
3347 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
3348 break;
3349 case KVM_CAP_NR_VCPUS:
3350 r = KVM_SOFT_MAX_VCPUS;
3351 break;
3352 case KVM_CAP_MAX_VCPUS:
3353 r = KVM_MAX_VCPUS;
3354 break;
3355 case KVM_CAP_MAX_VCPU_ID:
3356 r = KVM_MAX_VCPU_ID;
3357 break;
3358 case KVM_CAP_PV_MMU:
3359 r = 0;
3360 break;
3361 case KVM_CAP_MCE:
3362 r = KVM_MAX_MCE_BANKS;
3363 break;
3364 case KVM_CAP_XCRS:
3365 r = boot_cpu_has(X86_FEATURE_XSAVE);
3366 break;
3367 case KVM_CAP_TSC_CONTROL:
3368 r = kvm_has_tsc_control;
3369 break;
3370 case KVM_CAP_X2APIC_API:
3371 r = KVM_X2APIC_API_VALID_FLAGS;
3372 break;
3373 case KVM_CAP_NESTED_STATE:
3374 r = kvm_x86_ops->get_nested_state ?
3375 kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
3376 break;
3377 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
3378 r = kvm_x86_ops->enable_direct_tlbflush != NULL;
3379 break;
3380 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
3381 r = kvm_x86_ops->nested_enable_evmcs != NULL;
3382 break;
3383 default:
3384 break;
3385 }
3386 return r;
3387
3388 }
3389
3390 long kvm_arch_dev_ioctl(struct file *filp,
3391 unsigned int ioctl, unsigned long arg)
3392 {
3393 void __user *argp = (void __user *)arg;
3394 long r;
3395
3396 switch (ioctl) {
3397 case KVM_GET_MSR_INDEX_LIST: {
3398 struct kvm_msr_list __user *user_msr_list = argp;
3399 struct kvm_msr_list msr_list;
3400 unsigned n;
3401
3402 r = -EFAULT;
3403 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
3404 goto out;
3405 n = msr_list.nmsrs;
3406 msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
3407 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
3408 goto out;
3409 r = -E2BIG;
3410 if (n < msr_list.nmsrs)
3411 goto out;
3412 r = -EFAULT;
3413 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
3414 num_msrs_to_save * sizeof(u32)))
3415 goto out;
3416 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
3417 &emulated_msrs,
3418 num_emulated_msrs * sizeof(u32)))
3419 goto out;
3420 r = 0;
3421 break;
3422 }
3423 case KVM_GET_SUPPORTED_CPUID:
3424 case KVM_GET_EMULATED_CPUID: {
3425 struct kvm_cpuid2 __user *cpuid_arg = argp;
3426 struct kvm_cpuid2 cpuid;
3427
3428 r = -EFAULT;
3429 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
3430 goto out;
3431
3432 r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
3433 ioctl);
3434 if (r)
3435 goto out;
3436
3437 r = -EFAULT;
3438 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
3439 goto out;
3440 r = 0;
3441 break;
3442 }
3443 case KVM_X86_GET_MCE_CAP_SUPPORTED: {
3444 r = -EFAULT;
3445 if (copy_to_user(argp, &kvm_mce_cap_supported,
3446 sizeof(kvm_mce_cap_supported)))
3447 goto out;
3448 r = 0;
3449 break;
3450 case KVM_GET_MSR_FEATURE_INDEX_LIST: {
3451 struct kvm_msr_list __user *user_msr_list = argp;
3452 struct kvm_msr_list msr_list;
3453 unsigned int n;
3454
3455 r = -EFAULT;
3456 if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
3457 goto out;
3458 n = msr_list.nmsrs;
3459 msr_list.nmsrs = num_msr_based_features;
3460 if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
3461 goto out;
3462 r = -E2BIG;
3463 if (n < msr_list.nmsrs)
3464 goto out;
3465 r = -EFAULT;
3466 if (copy_to_user(user_msr_list->indices, &msr_based_features,
3467 num_msr_based_features * sizeof(u32)))
3468 goto out;
3469 r = 0;
3470 break;
3471 }
3472 case KVM_GET_MSRS:
3473 r = msr_io(NULL, argp, do_get_msr_feature, 1);
3474 break;
3475 }
3476 default:
3477 r = -EINVAL;
3478 }
3479 out:
3480 return r;
3481 }
3482
3483 static void wbinvd_ipi(void *garbage)
3484 {
3485 wbinvd();
3486 }
3487
3488 static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
3489 {
3490 return kvm_arch_has_noncoherent_dma(vcpu->kvm);
3491 }
3492
3493 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3494 {
3495
3496 if (need_emulate_wbinvd(vcpu)) {
3497 if (kvm_x86_ops->has_wbinvd_exit())
3498 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
3499 else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
3500 smp_call_function_single(vcpu->cpu,
3501 wbinvd_ipi, NULL, 1);
3502 }
3503
3504 kvm_x86_ops->vcpu_load(vcpu, cpu);
3505
3506
3507 if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
3508 adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
3509 vcpu->arch.tsc_offset_adjustment = 0;
3510 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
3511 }
3512
3513 if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
3514 s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
3515 rdtsc() - vcpu->arch.last_host_tsc;
3516 if (tsc_delta < 0)
3517 mark_tsc_unstable("KVM discovered backwards TSC");
3518
3519 if (kvm_check_tsc_unstable()) {
3520 u64 offset = kvm_compute_tsc_offset(vcpu,
3521 vcpu->arch.last_guest_tsc);
3522 kvm_vcpu_write_tsc_offset(vcpu, offset);
3523 vcpu->arch.tsc_catchup = 1;
3524 }
3525
3526 if (kvm_lapic_hv_timer_in_use(vcpu))
3527 kvm_lapic_restart_hv_timer(vcpu);
3528
3529
3530
3531
3532
3533 if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
3534 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
3535 if (vcpu->cpu != cpu)
3536 kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
3537 vcpu->cpu = cpu;
3538 }
3539
3540 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
3541 }
3542
3543 static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
3544 {
3545 struct kvm_host_map map;
3546 struct kvm_steal_time *st;
3547
3548 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
3549 return;
3550
3551 if (vcpu->arch.st.preempted)
3552 return;
3553
3554 if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
3555 &vcpu->arch.st.cache, true))
3556 return;
3557
3558 st = map.hva +
3559 offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
3560
3561 st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
3562
3563 kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
3564 }
3565
3566 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3567 {
3568 int idx;
3569
3570 if (vcpu->preempted)
3571 vcpu->arch.preempted_in_kernel = !kvm_x86_ops->get_cpl(vcpu);
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581 pagefault_disable();
3582
3583
3584
3585
3586 idx = srcu_read_lock(&vcpu->kvm->srcu);
3587 kvm_steal_time_set_preempted(vcpu);
3588 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3589 pagefault_enable();
3590 kvm_x86_ops->vcpu_put(vcpu);
3591 vcpu->arch.last_host_tsc = rdtsc();
3592
3593
3594
3595
3596
3597 set_debugreg(0, 6);
3598 }
3599
3600 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
3601 struct kvm_lapic_state *s)
3602 {
3603 if (vcpu->arch.apicv_active)
3604 kvm_x86_ops->sync_pir_to_irr(vcpu);
3605
3606 return kvm_apic_get_state(vcpu, s);
3607 }
3608
3609 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
3610 struct kvm_lapic_state *s)
3611 {
3612 int r;
3613
3614 r = kvm_apic_set_state(vcpu, s);
3615 if (r)
3616 return r;
3617 update_cr8_intercept(vcpu);
3618
3619 return 0;
3620 }
3621
3622 static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
3623 {
3624 return (!lapic_in_kernel(vcpu) ||
3625 kvm_apic_accept_pic_intr(vcpu));
3626 }
3627
3628
3629
3630
3631
3632
3633
3634 static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
3635 {
3636 return kvm_arch_interrupt_allowed(vcpu) &&
3637 !kvm_cpu_has_interrupt(vcpu) &&
3638 !kvm_event_needs_reinjection(vcpu) &&
3639 kvm_cpu_accept_dm_intr(vcpu);
3640 }
3641
3642 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
3643 struct kvm_interrupt *irq)
3644 {
3645 if (irq->irq >= KVM_NR_INTERRUPTS)
3646 return -EINVAL;
3647
3648 if (!irqchip_in_kernel(vcpu->kvm)) {
3649 kvm_queue_interrupt(vcpu, irq->irq, false);
3650 kvm_make_request(KVM_REQ_EVENT, vcpu);
3651 return 0;
3652 }
3653
3654
3655
3656
3657
3658 if (pic_in_kernel(vcpu->kvm))
3659 return -ENXIO;
3660
3661 if (vcpu->arch.pending_external_vector != -1)
3662 return -EEXIST;
3663
3664 vcpu->arch.pending_external_vector = irq->irq;
3665 kvm_make_request(KVM_REQ_EVENT, vcpu);
3666 return 0;
3667 }
3668
3669 static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
3670 {
3671 kvm_inject_nmi(vcpu);
3672
3673 return 0;
3674 }
3675
3676 static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
3677 {
3678 kvm_make_request(KVM_REQ_SMI, vcpu);
3679
3680 return 0;
3681 }
3682
3683 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
3684 struct kvm_tpr_access_ctl *tac)
3685 {
3686 if (tac->flags)
3687 return -EINVAL;
3688 vcpu->arch.tpr_access_reporting = !!tac->enabled;
3689 return 0;
3690 }
3691
3692 static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
3693 u64 mcg_cap)
3694 {
3695 int r;
3696 unsigned bank_num = mcg_cap & 0xff, bank;
3697
3698 r = -EINVAL;
3699 if (!bank_num || bank_num > KVM_MAX_MCE_BANKS)
3700 goto out;
3701 if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
3702 goto out;
3703 r = 0;
3704 vcpu->arch.mcg_cap = mcg_cap;
3705
3706 if (mcg_cap & MCG_CTL_P)
3707 vcpu->arch.mcg_ctl = ~(u64)0;
3708
3709 for (bank = 0; bank < bank_num; bank++)
3710 vcpu->arch.mce_banks[bank*4] = ~(u64)0;
3711
3712 kvm_x86_ops->setup_mce(vcpu);
3713 out:
3714 return r;
3715 }
3716
3717 static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
3718 struct kvm_x86_mce *mce)
3719 {
3720 u64 mcg_cap = vcpu->arch.mcg_cap;
3721 unsigned bank_num = mcg_cap & 0xff;
3722 u64 *banks = vcpu->arch.mce_banks;
3723
3724 if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
3725 return -EINVAL;
3726
3727
3728
3729
3730 if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
3731 vcpu->arch.mcg_ctl != ~(u64)0)
3732 return 0;
3733 banks += 4 * mce->bank;
3734
3735
3736
3737
3738 if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
3739 return 0;
3740 if (mce->status & MCI_STATUS_UC) {
3741 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
3742 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
3743 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
3744 return 0;
3745 }
3746 if (banks[1] & MCI_STATUS_VAL)
3747 mce->status |= MCI_STATUS_OVER;
3748 banks[2] = mce->addr;
3749 banks[3] = mce->misc;
3750 vcpu->arch.mcg_status = mce->mcg_status;
3751 banks[1] = mce->status;
3752 kvm_queue_exception(vcpu, MC_VECTOR);
3753 } else if (!(banks[1] & MCI_STATUS_VAL)
3754 || !(banks[1] & MCI_STATUS_UC)) {
3755 if (banks[1] & MCI_STATUS_VAL)
3756 mce->status |= MCI_STATUS_OVER;
3757 banks[2] = mce->addr;
3758 banks[3] = mce->misc;
3759 banks[1] = mce->status;
3760 } else
3761 banks[1] |= MCI_STATUS_OVER;
3762 return 0;
3763 }
3764
3765 static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
3766 struct kvm_vcpu_events *events)
3767 {
3768 process_nmi(vcpu);
3769
3770
3771
3772
3773
3774
3775
3776 if (kvm_exception_is_soft(vcpu->arch.exception.nr)) {
3777 events->exception.injected = 0;
3778 events->exception.pending = 0;
3779 } else {
3780 events->exception.injected = vcpu->arch.exception.injected;
3781 events->exception.pending = vcpu->arch.exception.pending;
3782
3783
3784
3785
3786
3787 if (!vcpu->kvm->arch.exception_payload_enabled)
3788 events->exception.injected |=
3789 vcpu->arch.exception.pending;
3790 }
3791 events->exception.nr = vcpu->arch.exception.nr;
3792 events->exception.has_error_code = vcpu->arch.exception.has_error_code;
3793 events->exception.error_code = vcpu->arch.exception.error_code;
3794 events->exception_has_payload = vcpu->arch.exception.has_payload;
3795 events->exception_payload = vcpu->arch.exception.payload;
3796
3797 events->interrupt.injected =
3798 vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
3799 events->interrupt.nr = vcpu->arch.interrupt.nr;
3800 events->interrupt.soft = 0;
3801 events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
3802
3803 events->nmi.injected = vcpu->arch.nmi_injected;
3804 events->nmi.pending = vcpu->arch.nmi_pending != 0;
3805 events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
3806 events->nmi.pad = 0;
3807
3808 events->sipi_vector = 0;
3809
3810 events->smi.smm = is_smm(vcpu);
3811 events->smi.pending = vcpu->arch.smi_pending;
3812 events->smi.smm_inside_nmi =
3813 !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
3814 events->smi.latched_init = kvm_lapic_latched_init(vcpu);
3815
3816 events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
3817 | KVM_VCPUEVENT_VALID_SHADOW
3818 | KVM_VCPUEVENT_VALID_SMM);
3819 if (vcpu->kvm->arch.exception_payload_enabled)
3820 events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD;
3821
3822 memset(&events->reserved, 0, sizeof(events->reserved));
3823 }
3824
3825 static void kvm_smm_changed(struct kvm_vcpu *vcpu);
3826
3827 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
3828 struct kvm_vcpu_events *events)
3829 {
3830 if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
3831 | KVM_VCPUEVENT_VALID_SIPI_VECTOR
3832 | KVM_VCPUEVENT_VALID_SHADOW
3833 | KVM_VCPUEVENT_VALID_SMM
3834 | KVM_VCPUEVENT_VALID_PAYLOAD))
3835 return -EINVAL;
3836
3837 if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) {
3838 if (!vcpu->kvm->arch.exception_payload_enabled)
3839 return -EINVAL;
3840 if (events->exception.pending)
3841 events->exception.injected = 0;
3842 else
3843 events->exception_has_payload = 0;
3844 } else {
3845 events->exception.pending = 0;
3846 events->exception_has_payload = 0;
3847 }
3848
3849 if ((events->exception.injected || events->exception.pending) &&
3850 (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
3851 return -EINVAL;
3852
3853
3854 if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
3855 (events->smi.smm || events->smi.pending) &&
3856 vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
3857 return -EINVAL;
3858
3859 process_nmi(vcpu);
3860 vcpu->arch.exception.injected = events->exception.injected;
3861 vcpu->arch.exception.pending = events->exception.pending;
3862 vcpu->arch.exception.nr = events->exception.nr;
3863 vcpu->arch.exception.has_error_code = events->exception.has_error_code;
3864 vcpu->arch.exception.error_code = events->exception.error_code;
3865 vcpu->arch.exception.has_payload = events->exception_has_payload;
3866 vcpu->arch.exception.payload = events->exception_payload;
3867
3868 vcpu->arch.interrupt.injected = events->interrupt.injected;
3869 vcpu->arch.interrupt.nr = events->interrupt.nr;
3870 vcpu->arch.interrupt.soft = events->interrupt.soft;
3871 if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
3872 kvm_x86_ops->set_interrupt_shadow(vcpu,
3873 events->interrupt.shadow);
3874
3875 vcpu->arch.nmi_injected = events->nmi.injected;
3876 if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
3877 vcpu->arch.nmi_pending = events->nmi.pending;
3878 kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
3879
3880 if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
3881 lapic_in_kernel(vcpu))
3882 vcpu->arch.apic->sipi_vector = events->sipi_vector;
3883
3884 if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
3885 if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
3886 if (events->smi.smm)
3887 vcpu->arch.hflags |= HF_SMM_MASK;
3888 else
3889 vcpu->arch.hflags &= ~HF_SMM_MASK;
3890 kvm_smm_changed(vcpu);
3891 }
3892
3893 vcpu->arch.smi_pending = events->smi.pending;
3894
3895 if (events->smi.smm) {
3896 if (events->smi.smm_inside_nmi)
3897 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
3898 else
3899 vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
3900 if (lapic_in_kernel(vcpu)) {
3901 if (events->smi.latched_init)
3902 set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3903 else
3904 clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
3905 }
3906 }
3907 }
3908
3909 kvm_make_request(KVM_REQ_EVENT, vcpu);
3910
3911 return 0;
3912 }
3913
3914 static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
3915 struct kvm_debugregs *dbgregs)
3916 {
3917 unsigned long val;
3918
3919 memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
3920 kvm_get_dr(vcpu, 6, &val);
3921 dbgregs->dr6 = val;
3922 dbgregs->dr7 = vcpu->arch.dr7;
3923 dbgregs->flags = 0;
3924 memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
3925 }
3926
3927 static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
3928 struct kvm_debugregs *dbgregs)
3929 {
3930 if (dbgregs->flags)
3931 return -EINVAL;
3932
3933 if (dbgregs->dr6 & ~0xffffffffull)
3934 return -EINVAL;
3935 if (dbgregs->dr7 & ~0xffffffffull)
3936 return -EINVAL;
3937
3938 memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
3939 kvm_update_dr0123(vcpu);
3940 vcpu->arch.dr6 = dbgregs->dr6;
3941 kvm_update_dr6(vcpu);
3942 vcpu->arch.dr7 = dbgregs->dr7;
3943 kvm_update_dr7(vcpu);
3944
3945 return 0;
3946 }
3947
3948 #define XSTATE_COMPACTION_ENABLED (1ULL << 63)
3949
3950 static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3951 {
3952 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
3953 u64 xstate_bv = xsave->header.xfeatures;
3954 u64 valid;
3955
3956
3957
3958
3959
3960 memcpy(dest, xsave, XSAVE_HDR_OFFSET);
3961
3962
3963 xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE;
3964 *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
3965
3966
3967
3968
3969
3970 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3971 while (valid) {
3972 u64 xfeature_mask = valid & -valid;
3973 int xfeature_nr = fls64(xfeature_mask) - 1;
3974 void *src = get_xsave_addr(xsave, xfeature_nr);
3975
3976 if (src) {
3977 u32 size, offset, ecx, edx;
3978 cpuid_count(XSTATE_CPUID, xfeature_nr,
3979 &size, &offset, &ecx, &edx);
3980 if (xfeature_nr == XFEATURE_PKRU)
3981 memcpy(dest + offset, &vcpu->arch.pkru,
3982 sizeof(vcpu->arch.pkru));
3983 else
3984 memcpy(dest + offset, src, size);
3985
3986 }
3987
3988 valid -= xfeature_mask;
3989 }
3990 }
3991
3992 static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
3993 {
3994 struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
3995 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
3996 u64 valid;
3997
3998
3999
4000
4001
4002 memcpy(xsave, src, XSAVE_HDR_OFFSET);
4003
4004
4005 xsave->header.xfeatures = xstate_bv;
4006 if (boot_cpu_has(X86_FEATURE_XSAVES))
4007 xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
4008
4009
4010
4011
4012
4013 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
4014 while (valid) {
4015 u64 xfeature_mask = valid & -valid;
4016 int xfeature_nr = fls64(xfeature_mask) - 1;
4017 void *dest = get_xsave_addr(xsave, xfeature_nr);
4018
4019 if (dest) {
4020 u32 size, offset, ecx, edx;
4021 cpuid_count(XSTATE_CPUID, xfeature_nr,
4022 &size, &offset, &ecx, &edx);
4023 if (xfeature_nr == XFEATURE_PKRU)
4024 memcpy(&vcpu->arch.pkru, src + offset,
4025 sizeof(vcpu->arch.pkru));
4026 else
4027 memcpy(dest, src + offset, size);
4028 }
4029
4030 valid -= xfeature_mask;
4031 }
4032 }
4033
4034 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
4035 struct kvm_xsave *guest_xsave)
4036 {
4037 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
4038 memset(guest_xsave, 0, sizeof(struct kvm_xsave));
4039 fill_xsave((u8 *) guest_xsave->region, vcpu);
4040 } else {
4041 memcpy(guest_xsave->region,
4042 &vcpu->arch.guest_fpu->state.fxsave,
4043 sizeof(struct fxregs_state));
4044 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
4045 XFEATURE_MASK_FPSSE;
4046 }
4047 }
4048
4049 #define XSAVE_MXCSR_OFFSET 24
4050
4051 static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
4052 struct kvm_xsave *guest_xsave)
4053 {
4054 u64 xstate_bv =
4055 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
4056 u32 mxcsr = *(u32 *)&guest_xsave->region[XSAVE_MXCSR_OFFSET / sizeof(u32)];
4057
4058 if (boot_cpu_has(X86_FEATURE_XSAVE)) {
4059
4060
4061
4062
4063
4064 if (xstate_bv & ~kvm_supported_xcr0() ||
4065 mxcsr & ~mxcsr_feature_mask)
4066 return -EINVAL;
4067 load_xsave(vcpu, (u8 *)guest_xsave->region);
4068 } else {
4069 if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
4070 mxcsr & ~mxcsr_feature_mask)
4071 return -EINVAL;
4072 memcpy(&vcpu->arch.guest_fpu->state.fxsave,
4073 guest_xsave->region, sizeof(struct fxregs_state));
4074 }
4075 return 0;
4076 }
4077
4078 static void kvm_vcpu_ioctl_x86_get_xcrs(struct kvm_vcpu *vcpu,
4079 struct kvm_xcrs *guest_xcrs)
4080 {
4081 if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
4082 guest_xcrs->nr_xcrs = 0;
4083 return;
4084 }
4085
4086 guest_xcrs->nr_xcrs = 1;
4087 guest_xcrs->flags = 0;
4088 guest_xcrs->xcrs[0].xcr = XCR_XFEATURE_ENABLED_MASK;
4089 guest_xcrs->xcrs[0].value = vcpu->arch.xcr0;
4090 }
4091
4092 static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
4093 struct kvm_xcrs *guest_xcrs)
4094 {
4095 int i, r = 0;
4096
4097 if (!boot_cpu_has(X86_FEATURE_XSAVE))
4098 return -EINVAL;
4099
4100 if (guest_xcrs->nr_xcrs > KVM_MAX_XCRS || guest_xcrs->flags)
4101 return -EINVAL;
4102
4103 for (i = 0; i < guest_xcrs->nr_xcrs; i++)
4104
4105 if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
4106 r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
4107 guest_xcrs->xcrs[i].value);
4108 break;
4109 }
4110 if (r)
4111 r = -EINVAL;
4112 return r;
4113 }
4114
4115
4116
4117
4118
4119
4120
4121 static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
4122 {
4123 if (!vcpu->arch.pv_time_enabled)
4124 return -EINVAL;
4125 vcpu->arch.pvclock_set_guest_stopped_request = true;
4126 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
4127 return 0;
4128 }
4129
4130 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4131 struct kvm_enable_cap *cap)
4132 {
4133 int r;
4134 uint16_t vmcs_version;
4135 void __user *user_ptr;
4136
4137 if (cap->flags)
4138 return -EINVAL;
4139
4140 switch (cap->cap) {
4141 case KVM_CAP_HYPERV_SYNIC2:
4142 if (cap->args[0])
4143 return -EINVAL;
4144
4145
4146 case KVM_CAP_HYPERV_SYNIC:
4147 if (!irqchip_in_kernel(vcpu->kvm))
4148 return -EINVAL;
4149 return kvm_hv_activate_synic(vcpu, cap->cap ==
4150 KVM_CAP_HYPERV_SYNIC2);
4151 case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
4152 if (!kvm_x86_ops->nested_enable_evmcs)
4153 return -ENOTTY;
4154 r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
4155 if (!r) {
4156 user_ptr = (void __user *)(uintptr_t)cap->args[0];
4157 if (copy_to_user(user_ptr, &vmcs_version,
4158 sizeof(vmcs_version)))
4159 r = -EFAULT;
4160 }
4161 return r;
4162 case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
4163 if (!kvm_x86_ops->enable_direct_tlbflush)
4164 return -ENOTTY;
4165
4166 return kvm_x86_ops->enable_direct_tlbflush(vcpu);
4167
4168 default:
4169 return -EINVAL;
4170 }
4171 }
4172
4173 long kvm_arch_vcpu_ioctl(struct file *filp,
4174 unsigned int ioctl, unsigned long arg)
4175 {
4176 struct kvm_vcpu *vcpu = filp->private_data;
4177 void __user *argp = (void __user *)arg;
4178 int r;
4179 union {
4180 struct kvm_lapic_state *lapic;
4181 struct kvm_xsave *xsave;
4182 struct kvm_xcrs *xcrs;
4183 void *buffer;
4184 } u;
4185
4186 vcpu_load(vcpu);
4187
4188 u.buffer = NULL;
4189 switch (ioctl) {
4190 case KVM_GET_LAPIC: {
4191 r = -EINVAL;
4192 if (!lapic_in_kernel(vcpu))
4193 goto out;
4194 u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
4195 GFP_KERNEL_ACCOUNT);
4196
4197 r = -ENOMEM;
4198 if (!u.lapic)
4199 goto out;
4200 r = kvm_vcpu_ioctl_get_lapic(vcpu, u.lapic);
4201 if (r)
4202 goto out;
4203 r = -EFAULT;
4204 if (copy_to_user(argp, u.lapic, sizeof(struct kvm_lapic_state)))
4205 goto out;
4206 r = 0;
4207 break;
4208 }
4209 case KVM_SET_LAPIC: {
4210 r = -EINVAL;
4211 if (!lapic_in_kernel(vcpu))
4212 goto out;
4213 u.lapic = memdup_user(argp, sizeof(*u.lapic));
4214 if (IS_ERR(u.lapic)) {
4215 r = PTR_ERR(u.lapic);
4216 goto out_nofree;
4217 }
4218
4219 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
4220 break;
4221 }
4222 case KVM_INTERRUPT: {
4223 struct kvm_interrupt irq;
4224
4225 r = -EFAULT;
4226 if (copy_from_user(&irq, argp, sizeof(irq)))
4227 goto out;
4228 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
4229 break;
4230 }
4231 case KVM_NMI: {
4232 r = kvm_vcpu_ioctl_nmi(vcpu);
4233 break;
4234 }
4235 case KVM_SMI: {
4236 r = kvm_vcpu_ioctl_smi(vcpu);
4237 break;
4238 }
4239 case KVM_SET_CPUID: {
4240 struct kvm_cpuid __user *cpuid_arg = argp;
4241 struct kvm_cpuid cpuid;
4242
4243 r = -EFAULT;
4244 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4245 goto out;
4246 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
4247 break;
4248 }
4249 case KVM_SET_CPUID2: {
4250 struct kvm_cpuid2 __user *cpuid_arg = argp;
4251 struct kvm_cpuid2 cpuid;
4252
4253 r = -EFAULT;
4254 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4255 goto out;
4256 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
4257 cpuid_arg->entries);
4258 break;
4259 }
4260 case KVM_GET_CPUID2: {
4261 struct kvm_cpuid2 __user *cpuid_arg = argp;
4262 struct kvm_cpuid2 cpuid;
4263
4264 r = -EFAULT;
4265 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4266 goto out;
4267 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
4268 cpuid_arg->entries);
4269 if (r)
4270 goto out;
4271 r = -EFAULT;
4272 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4273 goto out;
4274 r = 0;
4275 break;
4276 }
4277 case KVM_GET_MSRS: {
4278 int idx = srcu_read_lock(&vcpu->kvm->srcu);
4279 r = msr_io(vcpu, argp, do_get_msr, 1);
4280 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4281 break;
4282 }
4283 case KVM_SET_MSRS: {
4284 int idx = srcu_read_lock(&vcpu->kvm->srcu);
4285 r = msr_io(vcpu, argp, do_set_msr, 0);
4286 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4287 break;
4288 }
4289 case KVM_TPR_ACCESS_REPORTING: {
4290 struct kvm_tpr_access_ctl tac;
4291
4292 r = -EFAULT;
4293 if (copy_from_user(&tac, argp, sizeof(tac)))
4294 goto out;
4295 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
4296 if (r)
4297 goto out;
4298 r = -EFAULT;
4299 if (copy_to_user(argp, &tac, sizeof(tac)))
4300 goto out;
4301 r = 0;
4302 break;
4303 };
4304 case KVM_SET_VAPIC_ADDR: {
4305 struct kvm_vapic_addr va;
4306 int idx;
4307
4308 r = -EINVAL;
4309 if (!lapic_in_kernel(vcpu))
4310 goto out;
4311 r = -EFAULT;
4312 if (copy_from_user(&va, argp, sizeof(va)))
4313 goto out;
4314 idx = srcu_read_lock(&vcpu->kvm->srcu);
4315 r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
4316 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4317 break;
4318 }
4319 case KVM_X86_SETUP_MCE: {
4320 u64 mcg_cap;
4321
4322 r = -EFAULT;
4323 if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap)))
4324 goto out;
4325 r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
4326 break;
4327 }
4328 case KVM_X86_SET_MCE: {
4329 struct kvm_x86_mce mce;
4330
4331 r = -EFAULT;
4332 if (copy_from_user(&mce, argp, sizeof(mce)))
4333 goto out;
4334 r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
4335 break;
4336 }
4337 case KVM_GET_VCPU_EVENTS: {
4338 struct kvm_vcpu_events events;
4339
4340 kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
4341
4342 r = -EFAULT;
4343 if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
4344 break;
4345 r = 0;
4346 break;
4347 }
4348 case KVM_SET_VCPU_EVENTS: {
4349 struct kvm_vcpu_events events;
4350
4351 r = -EFAULT;
4352 if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
4353 break;
4354
4355 r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
4356 break;
4357 }
4358 case KVM_GET_DEBUGREGS: {
4359 struct kvm_debugregs dbgregs;
4360
4361 kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs);
4362
4363 r = -EFAULT;
4364 if (copy_to_user(argp, &dbgregs,
4365 sizeof(struct kvm_debugregs)))
4366 break;
4367 r = 0;
4368 break;
4369 }
4370 case KVM_SET_DEBUGREGS: {
4371 struct kvm_debugregs dbgregs;
4372
4373 r = -EFAULT;
4374 if (copy_from_user(&dbgregs, argp,
4375 sizeof(struct kvm_debugregs)))
4376 break;
4377
4378 r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs);
4379 break;
4380 }
4381 case KVM_GET_XSAVE: {
4382 u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
4383 r = -ENOMEM;
4384 if (!u.xsave)
4385 break;
4386
4387 kvm_vcpu_ioctl_x86_get_xsave(vcpu, u.xsave);
4388
4389 r = -EFAULT;
4390 if (copy_to_user(argp, u.xsave, sizeof(struct kvm_xsave)))
4391 break;
4392 r = 0;
4393 break;
4394 }
4395 case KVM_SET_XSAVE: {
4396 u.xsave = memdup_user(argp, sizeof(*u.xsave));
4397 if (IS_ERR(u.xsave)) {
4398 r = PTR_ERR(u.xsave);
4399 goto out_nofree;
4400 }
4401
4402 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
4403 break;
4404 }
4405 case KVM_GET_XCRS: {
4406 u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
4407 r = -ENOMEM;
4408 if (!u.xcrs)
4409 break;
4410
4411 kvm_vcpu_ioctl_x86_get_xcrs(vcpu, u.xcrs);
4412
4413 r = -EFAULT;
4414 if (copy_to_user(argp, u.xcrs,
4415 sizeof(struct kvm_xcrs)))
4416 break;
4417 r = 0;
4418 break;
4419 }
4420 case KVM_SET_XCRS: {
4421 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
4422 if (IS_ERR(u.xcrs)) {
4423 r = PTR_ERR(u.xcrs);
4424 goto out_nofree;
4425 }
4426
4427 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
4428 break;
4429 }
4430 case KVM_SET_TSC_KHZ: {
4431 u32 user_tsc_khz;
4432
4433 r = -EINVAL;
4434 user_tsc_khz = (u32)arg;
4435
4436 if (user_tsc_khz >= kvm_max_guest_tsc_khz)
4437 goto out;
4438
4439 if (user_tsc_khz == 0)
4440 user_tsc_khz = tsc_khz;
4441
4442 if (!kvm_set_tsc_khz(vcpu, user_tsc_khz))
4443 r = 0;
4444
4445 goto out;
4446 }
4447 case KVM_GET_TSC_KHZ: {
4448 r = vcpu->arch.virtual_tsc_khz;
4449 goto out;
4450 }
4451 case KVM_KVMCLOCK_CTRL: {
4452 r = kvm_set_guest_paused(vcpu);
4453 goto out;
4454 }
4455 case KVM_ENABLE_CAP: {
4456 struct kvm_enable_cap cap;
4457
4458 r = -EFAULT;
4459 if (copy_from_user(&cap, argp, sizeof(cap)))
4460 goto out;
4461 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4462 break;
4463 }
4464 case KVM_GET_NESTED_STATE: {
4465 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4466 u32 user_data_size;
4467
4468 r = -EINVAL;
4469 if (!kvm_x86_ops->get_nested_state)
4470 break;
4471
4472 BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
4473 r = -EFAULT;
4474 if (get_user(user_data_size, &user_kvm_nested_state->size))
4475 break;
4476
4477 r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
4478 user_data_size);
4479 if (r < 0)
4480 break;
4481
4482 if (r > user_data_size) {
4483 if (put_user(r, &user_kvm_nested_state->size))
4484 r = -EFAULT;
4485 else
4486 r = -E2BIG;
4487 break;
4488 }
4489
4490 r = 0;
4491 break;
4492 }
4493 case KVM_SET_NESTED_STATE: {
4494 struct kvm_nested_state __user *user_kvm_nested_state = argp;
4495 struct kvm_nested_state kvm_state;
4496 int idx;
4497
4498 r = -EINVAL;
4499 if (!kvm_x86_ops->set_nested_state)
4500 break;
4501
4502 r = -EFAULT;
4503 if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
4504 break;
4505
4506 r = -EINVAL;
4507 if (kvm_state.size < sizeof(kvm_state))
4508 break;
4509
4510 if (kvm_state.flags &
4511 ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
4512 | KVM_STATE_NESTED_EVMCS))
4513 break;
4514
4515
4516 if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING)
4517 && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE))
4518 break;
4519
4520 idx = srcu_read_lock(&vcpu->kvm->srcu);
4521 r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
4522 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4523 break;
4524 }
4525 case KVM_GET_SUPPORTED_HV_CPUID: {
4526 struct kvm_cpuid2 __user *cpuid_arg = argp;
4527 struct kvm_cpuid2 cpuid;
4528
4529 r = -EFAULT;
4530 if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
4531 goto out;
4532
4533 r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid,
4534 cpuid_arg->entries);
4535 if (r)
4536 goto out;
4537
4538 r = -EFAULT;
4539 if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
4540 goto out;
4541 r = 0;
4542 break;
4543 }
4544 default:
4545 r = -EINVAL;
4546 }
4547 out:
4548 kfree(u.buffer);
4549 out_nofree:
4550 vcpu_put(vcpu);
4551 return r;
4552 }
4553
4554 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4555 {
4556 return VM_FAULT_SIGBUS;
4557 }
4558
4559 static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
4560 {
4561 int ret;
4562
4563 if (addr > (unsigned int)(-3 * PAGE_SIZE))
4564 return -EINVAL;
4565 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
4566 return ret;
4567 }
4568
4569 static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
4570 u64 ident_addr)
4571 {
4572 return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr);
4573 }
4574
4575 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
4576 unsigned long kvm_nr_mmu_pages)
4577 {
4578 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
4579 return -EINVAL;
4580
4581 mutex_lock(&kvm->slots_lock);
4582
4583 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
4584 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
4585
4586 mutex_unlock(&kvm->slots_lock);
4587 return 0;
4588 }
4589
4590 static unsigned long kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
4591 {
4592 return kvm->arch.n_max_mmu_pages;
4593 }
4594
4595 static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
4596 {
4597 struct kvm_pic *pic = kvm->arch.vpic;
4598 int r;
4599
4600 r = 0;
4601 switch (chip->chip_id) {
4602 case KVM_IRQCHIP_PIC_MASTER:
4603 memcpy(&chip->chip.pic, &pic->pics[0],
4604 sizeof(struct kvm_pic_state));
4605 break;
4606 case KVM_IRQCHIP_PIC_SLAVE:
4607 memcpy(&chip->chip.pic, &pic->pics[1],
4608 sizeof(struct kvm_pic_state));
4609 break;
4610 case KVM_IRQCHIP_IOAPIC:
4611 kvm_get_ioapic(kvm, &chip->chip.ioapic);
4612 break;
4613 default:
4614 r = -EINVAL;
4615 break;
4616 }
4617 return r;
4618 }
4619
4620 static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
4621 {
4622 struct kvm_pic *pic = kvm->arch.vpic;
4623 int r;
4624
4625 r = 0;
4626 switch (chip->chip_id) {
4627 case KVM_IRQCHIP_PIC_MASTER:
4628 spin_lock(&pic->lock);
4629 memcpy(&pic->pics[0], &chip->chip.pic,
4630 sizeof(struct kvm_pic_state));
4631 spin_unlock(&pic->lock);
4632 break;
4633 case KVM_IRQCHIP_PIC_SLAVE:
4634 spin_lock(&pic->lock);
4635 memcpy(&pic->pics[1], &chip->chip.pic,
4636 sizeof(struct kvm_pic_state));
4637 spin_unlock(&pic->lock);
4638 break;
4639 case KVM_IRQCHIP_IOAPIC:
4640 kvm_set_ioapic(kvm, &chip->chip.ioapic);
4641 break;
4642 default:
4643 r = -EINVAL;
4644 break;
4645 }
4646 kvm_pic_update_irq(pic);
4647 return r;
4648 }
4649
4650 static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
4651 {
4652 struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
4653
4654 BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
4655
4656 mutex_lock(&kps->lock);
4657 memcpy(ps, &kps->channels, sizeof(*ps));
4658 mutex_unlock(&kps->lock);
4659 return 0;
4660 }
4661
4662 static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
4663 {
4664 int i;
4665 struct kvm_pit *pit = kvm->arch.vpit;
4666
4667 mutex_lock(&pit->pit_state.lock);
4668 memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
4669 for (i = 0; i < 3; i++)
4670 kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
4671 mutex_unlock(&pit->pit_state.lock);
4672 return 0;
4673 }
4674
4675 static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
4676 {
4677 mutex_lock(&kvm->arch.vpit->pit_state.lock);
4678 memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
4679 sizeof(ps->channels));
4680 ps->flags = kvm->arch.vpit->pit_state.flags;
4681 mutex_unlock(&kvm->arch.vpit->pit_state.lock);
4682 memset(&ps->reserved, 0, sizeof(ps->reserved));
4683 return 0;
4684 }
4685
4686 static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
4687 {
4688 int start = 0;
4689 int i;
4690 u32 prev_legacy, cur_legacy;
4691 struct kvm_pit *pit = kvm->arch.vpit;
4692
4693 mutex_lock(&pit->pit_state.lock);
4694 prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
4695 cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
4696 if (!prev_legacy && cur_legacy)
4697 start = 1;
4698 memcpy(&pit->pit_state.channels, &ps->channels,
4699 sizeof(pit->pit_state.channels));
4700 pit->pit_state.flags = ps->flags;
4701 for (i = 0; i < 3; i++)
4702 kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
4703 start && i == 0);
4704 mutex_unlock(&pit->pit_state.lock);
4705 return 0;
4706 }
4707
4708 static int kvm_vm_ioctl_reinject(struct kvm *kvm,
4709 struct kvm_reinject_control *control)
4710 {
4711 struct kvm_pit *pit = kvm->arch.vpit;
4712
4713 if (!pit)
4714 return -ENXIO;
4715
4716
4717
4718
4719
4720 mutex_lock(&pit->pit_state.lock);
4721 kvm_pit_set_reinject(pit, control->pit_reinject);
4722 mutex_unlock(&pit->pit_state.lock);
4723
4724 return 0;
4725 }
4726
4727
4728
4729
4730
4731
4732
4733
4734
4735
4736
4737
4738
4739
4740
4741
4742
4743
4744
4745
4746 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
4747 {
4748 bool flush = false;
4749 int r;
4750
4751 mutex_lock(&kvm->slots_lock);
4752
4753
4754
4755
4756 if (kvm_x86_ops->flush_log_dirty)
4757 kvm_x86_ops->flush_log_dirty(kvm);
4758
4759 r = kvm_get_dirty_log_protect(kvm, log, &flush);
4760
4761
4762
4763
4764
4765 lockdep_assert_held(&kvm->slots_lock);
4766 if (flush)
4767 kvm_flush_remote_tlbs(kvm);
4768
4769 mutex_unlock(&kvm->slots_lock);
4770 return r;
4771 }
4772
4773 int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
4774 {
4775 bool flush = false;
4776 int r;
4777
4778 mutex_lock(&kvm->slots_lock);
4779
4780
4781
4782
4783 if (kvm_x86_ops->flush_log_dirty)
4784 kvm_x86_ops->flush_log_dirty(kvm);
4785
4786 r = kvm_clear_dirty_log_protect(kvm, log, &flush);
4787
4788
4789
4790
4791
4792 lockdep_assert_held(&kvm->slots_lock);
4793 if (flush)
4794 kvm_flush_remote_tlbs(kvm);
4795
4796 mutex_unlock(&kvm->slots_lock);
4797 return r;
4798 }
4799
4800 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
4801 bool line_status)
4802 {
4803 if (!irqchip_in_kernel(kvm))
4804 return -ENXIO;
4805
4806 irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
4807 irq_event->irq, irq_event->level,
4808 line_status);
4809 return 0;
4810 }
4811
4812 int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
4813 struct kvm_enable_cap *cap)
4814 {
4815 int r;
4816
4817 if (cap->flags)
4818 return -EINVAL;
4819
4820 switch (cap->cap) {
4821 case KVM_CAP_DISABLE_QUIRKS:
4822 kvm->arch.disabled_quirks = cap->args[0];
4823 r = 0;
4824 break;
4825 case KVM_CAP_SPLIT_IRQCHIP: {
4826 mutex_lock(&kvm->lock);
4827 r = -EINVAL;
4828 if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
4829 goto split_irqchip_unlock;
4830 r = -EEXIST;
4831 if (irqchip_in_kernel(kvm))
4832 goto split_irqchip_unlock;
4833 if (kvm->created_vcpus)
4834 goto split_irqchip_unlock;
4835 r = kvm_setup_empty_irq_routing(kvm);
4836 if (r)
4837 goto split_irqchip_unlock;
4838
4839 smp_wmb();
4840 kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
4841 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
4842 r = 0;
4843 split_irqchip_unlock:
4844 mutex_unlock(&kvm->lock);
4845 break;
4846 }
4847 case KVM_CAP_X2APIC_API:
4848 r = -EINVAL;
4849 if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
4850 break;
4851
4852 if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
4853 kvm->arch.x2apic_format = true;
4854 if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
4855 kvm->arch.x2apic_broadcast_quirk_disabled = true;
4856
4857 r = 0;
4858 break;
4859 case KVM_CAP_X86_DISABLE_EXITS:
4860 r = -EINVAL;
4861 if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
4862 break;
4863
4864 if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
4865 kvm_can_mwait_in_guest())
4866 kvm->arch.mwait_in_guest = true;
4867 if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
4868 kvm->arch.hlt_in_guest = true;
4869 if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
4870 kvm->arch.pause_in_guest = true;
4871 if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
4872 kvm->arch.cstate_in_guest = true;
4873 r = 0;
4874 break;
4875 case KVM_CAP_MSR_PLATFORM_INFO:
4876 kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
4877 r = 0;
4878 break;
4879 case KVM_CAP_EXCEPTION_PAYLOAD:
4880 kvm->arch.exception_payload_enabled = cap->args[0];
4881 r = 0;
4882 break;
4883 default:
4884 r = -EINVAL;
4885 break;
4886 }
4887 return r;
4888 }
4889
4890 long kvm_arch_vm_ioctl(struct file *filp,
4891 unsigned int ioctl, unsigned long arg)
4892 {
4893 struct kvm *kvm = filp->private_data;
4894 void __user *argp = (void __user *)arg;
4895 int r = -ENOTTY;
4896
4897
4898
4899
4900
4901 union {
4902 struct kvm_pit_state ps;
4903 struct kvm_pit_state2 ps2;
4904 struct kvm_pit_config pit_config;
4905 } u;
4906
4907 switch (ioctl) {
4908 case KVM_SET_TSS_ADDR:
4909 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
4910 break;
4911 case KVM_SET_IDENTITY_MAP_ADDR: {
4912 u64 ident_addr;
4913
4914 mutex_lock(&kvm->lock);
4915 r = -EINVAL;
4916 if (kvm->created_vcpus)
4917 goto set_identity_unlock;
4918 r = -EFAULT;
4919 if (copy_from_user(&ident_addr, argp, sizeof(ident_addr)))
4920 goto set_identity_unlock;
4921 r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
4922 set_identity_unlock:
4923 mutex_unlock(&kvm->lock);
4924 break;
4925 }
4926 case KVM_SET_NR_MMU_PAGES:
4927 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
4928 break;
4929 case KVM_GET_NR_MMU_PAGES:
4930 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
4931 break;
4932 case KVM_CREATE_IRQCHIP: {
4933 mutex_lock(&kvm->lock);
4934
4935 r = -EEXIST;
4936 if (irqchip_in_kernel(kvm))
4937 goto create_irqchip_unlock;
4938
4939 r = -EINVAL;
4940 if (kvm->created_vcpus)
4941 goto create_irqchip_unlock;
4942
4943 r = kvm_pic_init(kvm);
4944 if (r)
4945 goto create_irqchip_unlock;
4946
4947 r = kvm_ioapic_init(kvm);
4948 if (r) {
4949 kvm_pic_destroy(kvm);
4950 goto create_irqchip_unlock;
4951 }
4952
4953 r = kvm_setup_default_irq_routing(kvm);
4954 if (r) {
4955 kvm_ioapic_destroy(kvm);
4956 kvm_pic_destroy(kvm);
4957 goto create_irqchip_unlock;
4958 }
4959
4960 smp_wmb();
4961 kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
4962 create_irqchip_unlock:
4963 mutex_unlock(&kvm->lock);
4964 break;
4965 }
4966 case KVM_CREATE_PIT:
4967 u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
4968 goto create_pit;
4969 case KVM_CREATE_PIT2:
4970 r = -EFAULT;
4971 if (copy_from_user(&u.pit_config, argp,
4972 sizeof(struct kvm_pit_config)))
4973 goto out;
4974 create_pit:
4975 mutex_lock(&kvm->lock);
4976 r = -EEXIST;
4977 if (kvm->arch.vpit)
4978 goto create_pit_unlock;
4979 r = -ENOMEM;
4980 kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
4981 if (kvm->arch.vpit)
4982 r = 0;
4983 create_pit_unlock:
4984 mutex_unlock(&kvm->lock);
4985 break;
4986 case KVM_GET_IRQCHIP: {
4987
4988 struct kvm_irqchip *chip;
4989
4990 chip = memdup_user(argp, sizeof(*chip));
4991 if (IS_ERR(chip)) {
4992 r = PTR_ERR(chip);
4993 goto out;
4994 }
4995
4996 r = -ENXIO;
4997 if (!irqchip_kernel(kvm))
4998 goto get_irqchip_out;
4999 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
5000 if (r)
5001 goto get_irqchip_out;
5002 r = -EFAULT;
5003 if (copy_to_user(argp, chip, sizeof(*chip)))
5004 goto get_irqchip_out;
5005 r = 0;
5006 get_irqchip_out:
5007 kfree(chip);
5008 break;
5009 }
5010 case KVM_SET_IRQCHIP: {
5011
5012 struct kvm_irqchip *chip;
5013
5014 chip = memdup_user(argp, sizeof(*chip));
5015 if (IS_ERR(chip)) {
5016 r = PTR_ERR(chip);
5017 goto out;
5018 }
5019
5020 r = -ENXIO;
5021 if (!irqchip_kernel(kvm))
5022 goto set_irqchip_out;
5023 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
5024 if (r)
5025 goto set_irqchip_out;
5026 r = 0;
5027 set_irqchip_out:
5028 kfree(chip);
5029 break;
5030 }
5031 case KVM_GET_PIT: {
5032 r = -EFAULT;
5033 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
5034 goto out;
5035 r = -ENXIO;
5036 if (!kvm->arch.vpit)
5037 goto out;
5038 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
5039 if (r)
5040 goto out;
5041 r = -EFAULT;
5042 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
5043 goto out;
5044 r = 0;
5045 break;
5046 }
5047 case KVM_SET_PIT: {
5048 r = -EFAULT;
5049 if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
5050 goto out;
5051 r = -ENXIO;
5052 if (!kvm->arch.vpit)
5053 goto out;
5054 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
5055 break;
5056 }
5057 case KVM_GET_PIT2: {
5058 r = -ENXIO;
5059 if (!kvm->arch.vpit)
5060 goto out;
5061 r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
5062 if (r)
5063 goto out;
5064 r = -EFAULT;
5065 if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
5066 goto out;
5067 r = 0;
5068 break;
5069 }
5070 case KVM_SET_PIT2: {
5071 r = -EFAULT;
5072 if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
5073 goto out;
5074 r = -ENXIO;
5075 if (!kvm->arch.vpit)
5076 goto out;
5077 r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
5078 break;
5079 }
5080 case KVM_REINJECT_CONTROL: {
5081 struct kvm_reinject_control control;
5082 r = -EFAULT;
5083 if (copy_from_user(&control, argp, sizeof(control)))
5084 goto out;
5085 r = kvm_vm_ioctl_reinject(kvm, &control);
5086 break;
5087 }
5088 case KVM_SET_BOOT_CPU_ID:
5089 r = 0;
5090 mutex_lock(&kvm->lock);
5091 if (kvm->created_vcpus)
5092 r = -EBUSY;
5093 else
5094 kvm->arch.bsp_vcpu_id = arg;
5095 mutex_unlock(&kvm->lock);
5096 break;
5097 case KVM_XEN_HVM_CONFIG: {
5098 struct kvm_xen_hvm_config xhc;
5099 r = -EFAULT;
5100 if (copy_from_user(&xhc, argp, sizeof(xhc)))
5101 goto out;
5102 r = -EINVAL;
5103 if (xhc.flags)
5104 goto out;
5105 memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
5106 r = 0;
5107 break;
5108 }
5109 case KVM_SET_CLOCK: {
5110 struct kvm_clock_data user_ns;
5111 u64 now_ns;
5112
5113 r = -EFAULT;
5114 if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
5115 goto out;
5116
5117 r = -EINVAL;
5118 if (user_ns.flags)
5119 goto out;
5120
5121 r = 0;
5122
5123
5124
5125
5126
5127 kvm_gen_update_masterclock(kvm);
5128 now_ns = get_kvmclock_ns(kvm);
5129 kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
5130 kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
5131 break;
5132 }
5133 case KVM_GET_CLOCK: {
5134 struct kvm_clock_data user_ns;
5135 u64 now_ns;
5136
5137 now_ns = get_kvmclock_ns(kvm);
5138 user_ns.clock = now_ns;
5139 user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
5140 memset(&user_ns.pad, 0, sizeof(user_ns.pad));
5141
5142 r = -EFAULT;
5143 if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
5144 goto out;
5145 r = 0;
5146 break;
5147 }
5148 case KVM_MEMORY_ENCRYPT_OP: {
5149 r = -ENOTTY;
5150 if (kvm_x86_ops->mem_enc_op)
5151 r = kvm_x86_ops->mem_enc_op(kvm, argp);
5152 break;
5153 }
5154 case KVM_MEMORY_ENCRYPT_REG_REGION: {
5155 struct kvm_enc_region region;
5156
5157 r = -EFAULT;
5158 if (copy_from_user(®ion, argp, sizeof(region)))
5159 goto out;
5160
5161 r = -ENOTTY;
5162 if (kvm_x86_ops->mem_enc_reg_region)
5163 r = kvm_x86_ops->mem_enc_reg_region(kvm, ®ion);
5164 break;
5165 }
5166 case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
5167 struct kvm_enc_region region;
5168
5169 r = -EFAULT;
5170 if (copy_from_user(®ion, argp, sizeof(region)))
5171 goto out;
5172
5173 r = -ENOTTY;
5174 if (kvm_x86_ops->mem_enc_unreg_region)
5175 r = kvm_x86_ops->mem_enc_unreg_region(kvm, ®ion);
5176 break;
5177 }
5178 case KVM_HYPERV_EVENTFD: {
5179 struct kvm_hyperv_eventfd hvevfd;
5180
5181 r = -EFAULT;
5182 if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
5183 goto out;
5184 r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
5185 break;
5186 }
5187 case KVM_SET_PMU_EVENT_FILTER:
5188 r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
5189 break;
5190 default:
5191 r = -ENOTTY;
5192 }
5193 out:
5194 return r;
5195 }
5196
5197 static void kvm_init_msr_list(void)
5198 {
5199 struct x86_pmu_capability x86_pmu;
5200 u32 dummy[2];
5201 unsigned i;
5202
5203 BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
5204 "Please update the fixed PMCs in msrs_to_saved_all[]");
5205
5206 perf_get_x86_pmu_capability(&x86_pmu);
5207
5208 num_msrs_to_save = 0;
5209 num_emulated_msrs = 0;
5210 num_msr_based_features = 0;
5211
5212 for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
5213 if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
5214 continue;
5215
5216
5217
5218
5219
5220 switch (msrs_to_save_all[i]) {
5221 case MSR_IA32_BNDCFGS:
5222 if (!kvm_mpx_supported())
5223 continue;
5224 break;
5225 case MSR_TSC_AUX:
5226 if (!kvm_x86_ops->rdtscp_supported())
5227 continue;
5228 break;
5229 case MSR_IA32_RTIT_CTL:
5230 case MSR_IA32_RTIT_STATUS:
5231 if (!kvm_x86_ops->pt_supported())
5232 continue;
5233 break;
5234 case MSR_IA32_RTIT_CR3_MATCH:
5235 if (!kvm_x86_ops->pt_supported() ||
5236 !intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
5237 continue;
5238 break;
5239 case MSR_IA32_RTIT_OUTPUT_BASE:
5240 case MSR_IA32_RTIT_OUTPUT_MASK:
5241 if (!kvm_x86_ops->pt_supported() ||
5242 (!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
5243 !intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
5244 continue;
5245 break;
5246 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
5247 if (!kvm_x86_ops->pt_supported() ||
5248 msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
5249 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
5250 continue;
5251 break;
5252 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
5253 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
5254 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
5255 continue;
5256 break;
5257 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
5258 if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
5259 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
5260 continue;
5261 }
5262 default:
5263 break;
5264 }
5265
5266 msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
5267 }
5268
5269 for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
5270 if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
5271 continue;
5272
5273 emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
5274 }
5275
5276 for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
5277 struct kvm_msr_entry msr;
5278
5279 msr.index = msr_based_features_all[i];
5280 if (kvm_get_msr_feature(&msr))
5281 continue;
5282
5283 msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
5284 }
5285 }
5286
5287 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
5288 const void *v)
5289 {
5290 int handled = 0;
5291 int n;
5292
5293 do {
5294 n = min(len, 8);
5295 if (!(lapic_in_kernel(vcpu) &&
5296 !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
5297 && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
5298 break;
5299 handled += n;
5300 addr += n;
5301 len -= n;
5302 v += n;
5303 } while (len);
5304
5305 return handled;
5306 }
5307
5308 static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
5309 {
5310 int handled = 0;
5311 int n;
5312
5313 do {
5314 n = min(len, 8);
5315 if (!(lapic_in_kernel(vcpu) &&
5316 !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
5317 addr, n, v))
5318 && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
5319 break;
5320 trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
5321 handled += n;
5322 addr += n;
5323 len -= n;
5324 v += n;
5325 } while (len);
5326
5327 return handled;
5328 }
5329
5330 static void kvm_set_segment(struct kvm_vcpu *vcpu,
5331 struct kvm_segment *var, int seg)
5332 {
5333 kvm_x86_ops->set_segment(vcpu, var, seg);
5334 }
5335
5336 void kvm_get_segment(struct kvm_vcpu *vcpu,
5337 struct kvm_segment *var, int seg)
5338 {
5339 kvm_x86_ops->get_segment(vcpu, var, seg);
5340 }
5341
5342 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
5343 struct x86_exception *exception)
5344 {
5345 gpa_t t_gpa;
5346
5347 BUG_ON(!mmu_is_nested(vcpu));
5348
5349
5350 access |= PFERR_USER_MASK;
5351 t_gpa = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
5352
5353 return t_gpa;
5354 }
5355
5356 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
5357 struct x86_exception *exception)
5358 {
5359 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5360 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5361 }
5362
5363 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
5364 struct x86_exception *exception)
5365 {
5366 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5367 access |= PFERR_FETCH_MASK;
5368 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5369 }
5370
5371 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
5372 struct x86_exception *exception)
5373 {
5374 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5375 access |= PFERR_WRITE_MASK;
5376 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5377 }
5378
5379
5380 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
5381 struct x86_exception *exception)
5382 {
5383 return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
5384 }
5385
5386 static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
5387 struct kvm_vcpu *vcpu, u32 access,
5388 struct x86_exception *exception)
5389 {
5390 void *data = val;
5391 int r = X86EMUL_CONTINUE;
5392
5393 while (bytes) {
5394 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
5395 exception);
5396 unsigned offset = addr & (PAGE_SIZE-1);
5397 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
5398 int ret;
5399
5400 if (gpa == UNMAPPED_GVA)
5401 return X86EMUL_PROPAGATE_FAULT;
5402 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, data,
5403 offset, toread);
5404 if (ret < 0) {
5405 r = X86EMUL_IO_NEEDED;
5406 goto out;
5407 }
5408
5409 bytes -= toread;
5410 data += toread;
5411 addr += toread;
5412 }
5413 out:
5414 return r;
5415 }
5416
5417
5418 static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
5419 gva_t addr, void *val, unsigned int bytes,
5420 struct x86_exception *exception)
5421 {
5422 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5423 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5424 unsigned offset;
5425 int ret;
5426
5427
5428 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
5429 exception);
5430 if (unlikely(gpa == UNMAPPED_GVA))
5431 return X86EMUL_PROPAGATE_FAULT;
5432
5433 offset = addr & (PAGE_SIZE-1);
5434 if (WARN_ON(offset + bytes > PAGE_SIZE))
5435 bytes = (unsigned)PAGE_SIZE - offset;
5436 ret = kvm_vcpu_read_guest_page(vcpu, gpa >> PAGE_SHIFT, val,
5437 offset, bytes);
5438 if (unlikely(ret < 0))
5439 return X86EMUL_IO_NEEDED;
5440
5441 return X86EMUL_CONTINUE;
5442 }
5443
5444 int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
5445 gva_t addr, void *val, unsigned int bytes,
5446 struct x86_exception *exception)
5447 {
5448 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
5449
5450
5451
5452
5453
5454
5455
5456 memset(exception, 0, sizeof(*exception));
5457 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
5458 exception);
5459 }
5460 EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
5461
5462 static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
5463 gva_t addr, void *val, unsigned int bytes,
5464 struct x86_exception *exception, bool system)
5465 {
5466 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5467 u32 access = 0;
5468
5469 if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
5470 access |= PFERR_USER_MASK;
5471
5472 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
5473 }
5474
5475 static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
5476 unsigned long addr, void *val, unsigned int bytes)
5477 {
5478 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5479 int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
5480
5481 return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
5482 }
5483
5484 static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
5485 struct kvm_vcpu *vcpu, u32 access,
5486 struct x86_exception *exception)
5487 {
5488 void *data = val;
5489 int r = X86EMUL_CONTINUE;
5490
5491 while (bytes) {
5492 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
5493 access,
5494 exception);
5495 unsigned offset = addr & (PAGE_SIZE-1);
5496 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
5497 int ret;
5498
5499 if (gpa == UNMAPPED_GVA)
5500 return X86EMUL_PROPAGATE_FAULT;
5501 ret = kvm_vcpu_write_guest(vcpu, gpa, data, towrite);
5502 if (ret < 0) {
5503 r = X86EMUL_IO_NEEDED;
5504 goto out;
5505 }
5506
5507 bytes -= towrite;
5508 data += towrite;
5509 addr += towrite;
5510 }
5511 out:
5512 return r;
5513 }
5514
5515 static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
5516 unsigned int bytes, struct x86_exception *exception,
5517 bool system)
5518 {
5519 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5520 u32 access = PFERR_WRITE_MASK;
5521
5522 if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
5523 access |= PFERR_USER_MASK;
5524
5525 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
5526 access, exception);
5527 }
5528
5529 int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
5530 unsigned int bytes, struct x86_exception *exception)
5531 {
5532
5533 vcpu->arch.l1tf_flush_l1d = true;
5534
5535
5536
5537
5538
5539
5540
5541 memset(exception, 0, sizeof(*exception));
5542 return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
5543 PFERR_WRITE_MASK, exception);
5544 }
5545 EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
5546
5547 int handle_ud(struct kvm_vcpu *vcpu)
5548 {
5549 int emul_type = EMULTYPE_TRAP_UD;
5550 char sig[5];
5551 struct x86_exception e;
5552
5553 if (force_emulation_prefix &&
5554 kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
5555 sig, sizeof(sig), &e) == 0 &&
5556 memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
5557 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
5558 emul_type = EMULTYPE_TRAP_UD_FORCED;
5559 }
5560
5561 return kvm_emulate_instruction(vcpu, emul_type);
5562 }
5563 EXPORT_SYMBOL_GPL(handle_ud);
5564
5565 static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
5566 gpa_t gpa, bool write)
5567 {
5568
5569 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
5570 return 1;
5571
5572 if (vcpu_match_mmio_gpa(vcpu, gpa)) {
5573 trace_vcpu_match_mmio(gva, gpa, write, true);
5574 return 1;
5575 }
5576
5577 return 0;
5578 }
5579
5580 static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
5581 gpa_t *gpa, struct x86_exception *exception,
5582 bool write)
5583 {
5584 u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
5585 | (write ? PFERR_WRITE_MASK : 0);
5586
5587
5588
5589
5590
5591
5592 if (vcpu_match_mmio_gva(vcpu, gva)
5593 && !permission_fault(vcpu, vcpu->arch.walk_mmu,
5594 vcpu->arch.mmio_access, 0, access)) {
5595 *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
5596 (gva & (PAGE_SIZE - 1));
5597 trace_vcpu_match_mmio(gva, *gpa, write, false);
5598 return 1;
5599 }
5600
5601 *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
5602
5603 if (*gpa == UNMAPPED_GVA)
5604 return -1;
5605
5606 return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
5607 }
5608
5609 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
5610 const void *val, int bytes)
5611 {
5612 int ret;
5613
5614 ret = kvm_vcpu_write_guest(vcpu, gpa, val, bytes);
5615 if (ret < 0)
5616 return 0;
5617 kvm_page_track_write(vcpu, gpa, val, bytes);
5618 return 1;
5619 }
5620
5621 struct read_write_emulator_ops {
5622 int (*read_write_prepare)(struct kvm_vcpu *vcpu, void *val,
5623 int bytes);
5624 int (*read_write_emulate)(struct kvm_vcpu *vcpu, gpa_t gpa,
5625 void *val, int bytes);
5626 int (*read_write_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
5627 int bytes, void *val);
5628 int (*read_write_exit_mmio)(struct kvm_vcpu *vcpu, gpa_t gpa,
5629 void *val, int bytes);
5630 bool write;
5631 };
5632
5633 static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
5634 {
5635 if (vcpu->mmio_read_completed) {
5636 trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
5637 vcpu->mmio_fragments[0].gpa, val);
5638 vcpu->mmio_read_completed = 0;
5639 return 1;
5640 }
5641
5642 return 0;
5643 }
5644
5645 static int read_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
5646 void *val, int bytes)
5647 {
5648 return !kvm_vcpu_read_guest(vcpu, gpa, val, bytes);
5649 }
5650
5651 static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
5652 void *val, int bytes)
5653 {
5654 return emulator_write_phys(vcpu, gpa, val, bytes);
5655 }
5656
5657 static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
5658 {
5659 trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
5660 return vcpu_mmio_write(vcpu, gpa, bytes, val);
5661 }
5662
5663 static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
5664 void *val, int bytes)
5665 {
5666 trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
5667 return X86EMUL_IO_NEEDED;
5668 }
5669
5670 static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
5671 void *val, int bytes)
5672 {
5673 struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
5674
5675 memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
5676 return X86EMUL_CONTINUE;
5677 }
5678
5679 static const struct read_write_emulator_ops read_emultor = {
5680 .read_write_prepare = read_prepare,
5681 .read_write_emulate = read_emulate,
5682 .read_write_mmio = vcpu_mmio_read,
5683 .read_write_exit_mmio = read_exit_mmio,
5684 };
5685
5686 static const struct read_write_emulator_ops write_emultor = {
5687 .read_write_emulate = write_emulate,
5688 .read_write_mmio = write_mmio,
5689 .read_write_exit_mmio = write_exit_mmio,
5690 .write = true,
5691 };
5692
5693 static int emulator_read_write_onepage(unsigned long addr, void *val,
5694 unsigned int bytes,
5695 struct x86_exception *exception,
5696 struct kvm_vcpu *vcpu,
5697 const struct read_write_emulator_ops *ops)
5698 {
5699 gpa_t gpa;
5700 int handled, ret;
5701 bool write = ops->write;
5702 struct kvm_mmio_fragment *frag;
5703 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
5704
5705
5706
5707
5708
5709
5710
5711
5712 if (vcpu->arch.gpa_available &&
5713 emulator_can_use_gpa(ctxt) &&
5714 (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
5715 gpa = vcpu->arch.gpa_val;
5716 ret = vcpu_is_mmio_gpa(vcpu, addr, gpa, write);
5717 } else {
5718 ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
5719 if (ret < 0)
5720 return X86EMUL_PROPAGATE_FAULT;
5721 }
5722
5723 if (!ret && ops->read_write_emulate(vcpu, gpa, val, bytes))
5724 return X86EMUL_CONTINUE;
5725
5726
5727
5728
5729 handled = ops->read_write_mmio(vcpu, gpa, bytes, val);
5730 if (handled == bytes)
5731 return X86EMUL_CONTINUE;
5732
5733 gpa += handled;
5734 bytes -= handled;
5735 val += handled;
5736
5737 WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
5738 frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
5739 frag->gpa = gpa;
5740 frag->data = val;
5741 frag->len = bytes;
5742 return X86EMUL_CONTINUE;
5743 }
5744
5745 static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
5746 unsigned long addr,
5747 void *val, unsigned int bytes,
5748 struct x86_exception *exception,
5749 const struct read_write_emulator_ops *ops)
5750 {
5751 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5752 gpa_t gpa;
5753 int rc;
5754
5755 if (ops->read_write_prepare &&
5756 ops->read_write_prepare(vcpu, val, bytes))
5757 return X86EMUL_CONTINUE;
5758
5759 vcpu->mmio_nr_fragments = 0;
5760
5761
5762 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
5763 int now;
5764
5765 now = -addr & ~PAGE_MASK;
5766 rc = emulator_read_write_onepage(addr, val, now, exception,
5767 vcpu, ops);
5768
5769 if (rc != X86EMUL_CONTINUE)
5770 return rc;
5771 addr += now;
5772 if (ctxt->mode != X86EMUL_MODE_PROT64)
5773 addr = (u32)addr;
5774 val += now;
5775 bytes -= now;
5776 }
5777
5778 rc = emulator_read_write_onepage(addr, val, bytes, exception,
5779 vcpu, ops);
5780 if (rc != X86EMUL_CONTINUE)
5781 return rc;
5782
5783 if (!vcpu->mmio_nr_fragments)
5784 return rc;
5785
5786 gpa = vcpu->mmio_fragments[0].gpa;
5787
5788 vcpu->mmio_needed = 1;
5789 vcpu->mmio_cur_fragment = 0;
5790
5791 vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
5792 vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
5793 vcpu->run->exit_reason = KVM_EXIT_MMIO;
5794 vcpu->run->mmio.phys_addr = gpa;
5795
5796 return ops->read_write_exit_mmio(vcpu, gpa, val, bytes);
5797 }
5798
5799 static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
5800 unsigned long addr,
5801 void *val,
5802 unsigned int bytes,
5803 struct x86_exception *exception)
5804 {
5805 return emulator_read_write(ctxt, addr, val, bytes,
5806 exception, &read_emultor);
5807 }
5808
5809 static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
5810 unsigned long addr,
5811 const void *val,
5812 unsigned int bytes,
5813 struct x86_exception *exception)
5814 {
5815 return emulator_read_write(ctxt, addr, (void *)val, bytes,
5816 exception, &write_emultor);
5817 }
5818
5819 #define CMPXCHG_TYPE(t, ptr, old, new) \
5820 (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old))
5821
5822 #ifdef CONFIG_X86_64
5823 # define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new)
5824 #else
5825 # define CMPXCHG64(ptr, old, new) \
5826 (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old))
5827 #endif
5828
5829 static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
5830 unsigned long addr,
5831 const void *old,
5832 const void *new,
5833 unsigned int bytes,
5834 struct x86_exception *exception)
5835 {
5836 struct kvm_host_map map;
5837 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5838 gpa_t gpa;
5839 char *kaddr;
5840 bool exchanged;
5841
5842
5843 if (bytes > 8 || (bytes & (bytes - 1)))
5844 goto emul_write;
5845
5846 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
5847
5848 if (gpa == UNMAPPED_GVA ||
5849 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
5850 goto emul_write;
5851
5852 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
5853 goto emul_write;
5854
5855 if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
5856 goto emul_write;
5857
5858 kaddr = map.hva + offset_in_page(gpa);
5859
5860 switch (bytes) {
5861 case 1:
5862 exchanged = CMPXCHG_TYPE(u8, kaddr, old, new);
5863 break;
5864 case 2:
5865 exchanged = CMPXCHG_TYPE(u16, kaddr, old, new);
5866 break;
5867 case 4:
5868 exchanged = CMPXCHG_TYPE(u32, kaddr, old, new);
5869 break;
5870 case 8:
5871 exchanged = CMPXCHG64(kaddr, old, new);
5872 break;
5873 default:
5874 BUG();
5875 }
5876
5877 kvm_vcpu_unmap(vcpu, &map, true);
5878
5879 if (!exchanged)
5880 return X86EMUL_CMPXCHG_FAILED;
5881
5882 kvm_page_track_write(vcpu, gpa, new, bytes);
5883
5884 return X86EMUL_CONTINUE;
5885
5886 emul_write:
5887 printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
5888
5889 return emulator_write_emulated(ctxt, addr, new, bytes, exception);
5890 }
5891
5892 static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
5893 {
5894 int r = 0, i;
5895
5896 for (i = 0; i < vcpu->arch.pio.count; i++) {
5897 if (vcpu->arch.pio.in)
5898 r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
5899 vcpu->arch.pio.size, pd);
5900 else
5901 r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
5902 vcpu->arch.pio.port, vcpu->arch.pio.size,
5903 pd);
5904 if (r)
5905 break;
5906 pd += vcpu->arch.pio.size;
5907 }
5908 return r;
5909 }
5910
5911 static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size,
5912 unsigned short port, void *val,
5913 unsigned int count, bool in)
5914 {
5915 vcpu->arch.pio.port = port;
5916 vcpu->arch.pio.in = in;
5917 vcpu->arch.pio.count = count;
5918 vcpu->arch.pio.size = size;
5919
5920 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
5921 vcpu->arch.pio.count = 0;
5922 return 1;
5923 }
5924
5925 vcpu->run->exit_reason = KVM_EXIT_IO;
5926 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
5927 vcpu->run->io.size = size;
5928 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
5929 vcpu->run->io.count = count;
5930 vcpu->run->io.port = port;
5931
5932 return 0;
5933 }
5934
5935 static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt,
5936 int size, unsigned short port, void *val,
5937 unsigned int count)
5938 {
5939 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5940 int ret;
5941
5942 if (vcpu->arch.pio.count)
5943 goto data_avail;
5944
5945 memset(vcpu->arch.pio_data, 0, size * count);
5946
5947 ret = emulator_pio_in_out(vcpu, size, port, val, count, true);
5948 if (ret) {
5949 data_avail:
5950 memcpy(val, vcpu->arch.pio_data, size * count);
5951 trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data);
5952 vcpu->arch.pio.count = 0;
5953 return 1;
5954 }
5955
5956 return 0;
5957 }
5958
5959 static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
5960 int size, unsigned short port,
5961 const void *val, unsigned int count)
5962 {
5963 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
5964
5965 memcpy(vcpu->arch.pio_data, val, size * count);
5966 trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data);
5967 return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false);
5968 }
5969
5970 static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
5971 {
5972 return kvm_x86_ops->get_segment_base(vcpu, seg);
5973 }
5974
5975 static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
5976 {
5977 kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
5978 }
5979
5980 static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
5981 {
5982 if (!need_emulate_wbinvd(vcpu))
5983 return X86EMUL_CONTINUE;
5984
5985 if (kvm_x86_ops->has_wbinvd_exit()) {
5986 int cpu = get_cpu();
5987
5988 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
5989 smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
5990 wbinvd_ipi, NULL, 1);
5991 put_cpu();
5992 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
5993 } else
5994 wbinvd();
5995 return X86EMUL_CONTINUE;
5996 }
5997
5998 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
5999 {
6000 kvm_emulate_wbinvd_noskip(vcpu);
6001 return kvm_skip_emulated_instruction(vcpu);
6002 }
6003 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
6004
6005
6006
6007 static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
6008 {
6009 kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
6010 }
6011
6012 static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
6013 unsigned long *dest)
6014 {
6015 return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
6016 }
6017
6018 static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
6019 unsigned long value)
6020 {
6021
6022 return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
6023 }
6024
6025 static u64 mk_cr_64(u64 curr_cr, u32 new_val)
6026 {
6027 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
6028 }
6029
6030 static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
6031 {
6032 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6033 unsigned long value;
6034
6035 switch (cr) {
6036 case 0:
6037 value = kvm_read_cr0(vcpu);
6038 break;
6039 case 2:
6040 value = vcpu->arch.cr2;
6041 break;
6042 case 3:
6043 value = kvm_read_cr3(vcpu);
6044 break;
6045 case 4:
6046 value = kvm_read_cr4(vcpu);
6047 break;
6048 case 8:
6049 value = kvm_get_cr8(vcpu);
6050 break;
6051 default:
6052 kvm_err("%s: unexpected cr %u\n", __func__, cr);
6053 return 0;
6054 }
6055
6056 return value;
6057 }
6058
6059 static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
6060 {
6061 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6062 int res = 0;
6063
6064 switch (cr) {
6065 case 0:
6066 res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
6067 break;
6068 case 2:
6069 vcpu->arch.cr2 = val;
6070 break;
6071 case 3:
6072 res = kvm_set_cr3(vcpu, val);
6073 break;
6074 case 4:
6075 res = kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
6076 break;
6077 case 8:
6078 res = kvm_set_cr8(vcpu, val);
6079 break;
6080 default:
6081 kvm_err("%s: unexpected cr %u\n", __func__, cr);
6082 res = -1;
6083 }
6084
6085 return res;
6086 }
6087
6088 static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
6089 {
6090 return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
6091 }
6092
6093 static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6094 {
6095 kvm_x86_ops->get_gdt(emul_to_vcpu(ctxt), dt);
6096 }
6097
6098 static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6099 {
6100 kvm_x86_ops->get_idt(emul_to_vcpu(ctxt), dt);
6101 }
6102
6103 static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6104 {
6105 kvm_x86_ops->set_gdt(emul_to_vcpu(ctxt), dt);
6106 }
6107
6108 static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
6109 {
6110 kvm_x86_ops->set_idt(emul_to_vcpu(ctxt), dt);
6111 }
6112
6113 static unsigned long emulator_get_cached_segment_base(
6114 struct x86_emulate_ctxt *ctxt, int seg)
6115 {
6116 return get_segment_base(emul_to_vcpu(ctxt), seg);
6117 }
6118
6119 static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
6120 struct desc_struct *desc, u32 *base3,
6121 int seg)
6122 {
6123 struct kvm_segment var;
6124
6125 kvm_get_segment(emul_to_vcpu(ctxt), &var, seg);
6126 *selector = var.selector;
6127
6128 if (var.unusable) {
6129 memset(desc, 0, sizeof(*desc));
6130 if (base3)
6131 *base3 = 0;
6132 return false;
6133 }
6134
6135 if (var.g)
6136 var.limit >>= 12;
6137 set_desc_limit(desc, var.limit);
6138 set_desc_base(desc, (unsigned long)var.base);
6139 #ifdef CONFIG_X86_64
6140 if (base3)
6141 *base3 = var.base >> 32;
6142 #endif
6143 desc->type = var.type;
6144 desc->s = var.s;
6145 desc->dpl = var.dpl;
6146 desc->p = var.present;
6147 desc->avl = var.avl;
6148 desc->l = var.l;
6149 desc->d = var.db;
6150 desc->g = var.g;
6151
6152 return true;
6153 }
6154
6155 static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
6156 struct desc_struct *desc, u32 base3,
6157 int seg)
6158 {
6159 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6160 struct kvm_segment var;
6161
6162 var.selector = selector;
6163 var.base = get_desc_base(desc);
6164 #ifdef CONFIG_X86_64
6165 var.base |= ((u64)base3) << 32;
6166 #endif
6167 var.limit = get_desc_limit(desc);
6168 if (desc->g)
6169 var.limit = (var.limit << 12) | 0xfff;
6170 var.type = desc->type;
6171 var.dpl = desc->dpl;
6172 var.db = desc->d;
6173 var.s = desc->s;
6174 var.l = desc->l;
6175 var.g = desc->g;
6176 var.avl = desc->avl;
6177 var.present = desc->p;
6178 var.unusable = !var.present;
6179 var.padding = 0;
6180
6181 kvm_set_segment(vcpu, &var, seg);
6182 return;
6183 }
6184
6185 static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
6186 u32 msr_index, u64 *pdata)
6187 {
6188 return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
6189 }
6190
6191 static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
6192 u32 msr_index, u64 data)
6193 {
6194 return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
6195 }
6196
6197 static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
6198 {
6199 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6200
6201 return vcpu->arch.smbase;
6202 }
6203
6204 static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
6205 {
6206 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6207
6208 vcpu->arch.smbase = smbase;
6209 }
6210
6211 static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
6212 u32 pmc)
6213 {
6214 return kvm_pmu_is_valid_msr_idx(emul_to_vcpu(ctxt), pmc);
6215 }
6216
6217 static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
6218 u32 pmc, u64 *pdata)
6219 {
6220 return kvm_pmu_rdpmc(emul_to_vcpu(ctxt), pmc, pdata);
6221 }
6222
6223 static void emulator_halt(struct x86_emulate_ctxt *ctxt)
6224 {
6225 emul_to_vcpu(ctxt)->arch.halt_request = 1;
6226 }
6227
6228 static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
6229 struct x86_instruction_info *info,
6230 enum x86_intercept_stage stage)
6231 {
6232 return kvm_x86_ops->check_intercept(emul_to_vcpu(ctxt), info, stage);
6233 }
6234
6235 static bool emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
6236 u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool check_limit)
6237 {
6238 return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, check_limit);
6239 }
6240
6241 static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
6242 {
6243 return kvm_register_read(emul_to_vcpu(ctxt), reg);
6244 }
6245
6246 static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
6247 {
6248 kvm_register_write(emul_to_vcpu(ctxt), reg, val);
6249 }
6250
6251 static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
6252 {
6253 kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
6254 }
6255
6256 static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
6257 {
6258 return emul_to_vcpu(ctxt)->arch.hflags;
6259 }
6260
6261 static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
6262 {
6263 emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
6264 }
6265
6266 static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
6267 const char *smstate)
6268 {
6269 return kvm_x86_ops->pre_leave_smm(emul_to_vcpu(ctxt), smstate);
6270 }
6271
6272 static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
6273 {
6274 kvm_smm_changed(emul_to_vcpu(ctxt));
6275 }
6276
6277 static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
6278 {
6279 return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
6280 }
6281
6282 static const struct x86_emulate_ops emulate_ops = {
6283 .read_gpr = emulator_read_gpr,
6284 .write_gpr = emulator_write_gpr,
6285 .read_std = emulator_read_std,
6286 .write_std = emulator_write_std,
6287 .read_phys = kvm_read_guest_phys_system,
6288 .fetch = kvm_fetch_guest_virt,
6289 .read_emulated = emulator_read_emulated,
6290 .write_emulated = emulator_write_emulated,
6291 .cmpxchg_emulated = emulator_cmpxchg_emulated,
6292 .invlpg = emulator_invlpg,
6293 .pio_in_emulated = emulator_pio_in_emulated,
6294 .pio_out_emulated = emulator_pio_out_emulated,
6295 .get_segment = emulator_get_segment,
6296 .set_segment = emulator_set_segment,
6297 .get_cached_segment_base = emulator_get_cached_segment_base,
6298 .get_gdt = emulator_get_gdt,
6299 .get_idt = emulator_get_idt,
6300 .set_gdt = emulator_set_gdt,
6301 .set_idt = emulator_set_idt,
6302 .get_cr = emulator_get_cr,
6303 .set_cr = emulator_set_cr,
6304 .cpl = emulator_get_cpl,
6305 .get_dr = emulator_get_dr,
6306 .set_dr = emulator_set_dr,
6307 .get_smbase = emulator_get_smbase,
6308 .set_smbase = emulator_set_smbase,
6309 .set_msr = emulator_set_msr,
6310 .get_msr = emulator_get_msr,
6311 .check_pmc = emulator_check_pmc,
6312 .read_pmc = emulator_read_pmc,
6313 .halt = emulator_halt,
6314 .wbinvd = emulator_wbinvd,
6315 .fix_hypercall = emulator_fix_hypercall,
6316 .intercept = emulator_intercept,
6317 .get_cpuid = emulator_get_cpuid,
6318 .set_nmi_mask = emulator_set_nmi_mask,
6319 .get_hflags = emulator_get_hflags,
6320 .set_hflags = emulator_set_hflags,
6321 .pre_leave_smm = emulator_pre_leave_smm,
6322 .post_leave_smm = emulator_post_leave_smm,
6323 .set_xcr = emulator_set_xcr,
6324 };
6325
6326 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
6327 {
6328 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
6329
6330
6331
6332
6333
6334
6335
6336 if (int_shadow & mask)
6337 mask = 0;
6338 if (unlikely(int_shadow || mask)) {
6339 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
6340 if (!mask)
6341 kvm_make_request(KVM_REQ_EVENT, vcpu);
6342 }
6343 }
6344
6345 static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
6346 {
6347 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6348 if (ctxt->exception.vector == PF_VECTOR)
6349 return kvm_propagate_fault(vcpu, &ctxt->exception);
6350
6351 if (ctxt->exception.error_code_valid)
6352 kvm_queue_exception_e(vcpu, ctxt->exception.vector,
6353 ctxt->exception.error_code);
6354 else
6355 kvm_queue_exception(vcpu, ctxt->exception.vector);
6356 return false;
6357 }
6358
6359 static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
6360 {
6361 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6362 int cs_db, cs_l;
6363
6364 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
6365
6366 ctxt->eflags = kvm_get_rflags(vcpu);
6367 ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
6368
6369 ctxt->eip = kvm_rip_read(vcpu);
6370 ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
6371 (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
6372 (cs_l && is_long_mode(vcpu)) ? X86EMUL_MODE_PROT64 :
6373 cs_db ? X86EMUL_MODE_PROT32 :
6374 X86EMUL_MODE_PROT16;
6375 BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
6376 BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
6377 BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
6378
6379 init_decode_cache(ctxt);
6380 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
6381 }
6382
6383 void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
6384 {
6385 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6386 int ret;
6387
6388 init_emulate_ctxt(vcpu);
6389
6390 ctxt->op_bytes = 2;
6391 ctxt->ad_bytes = 2;
6392 ctxt->_eip = ctxt->eip + inc_eip;
6393 ret = emulate_int_real(ctxt, irq);
6394
6395 if (ret != X86EMUL_CONTINUE) {
6396 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
6397 } else {
6398 ctxt->eip = ctxt->_eip;
6399 kvm_rip_write(vcpu, ctxt->eip);
6400 kvm_set_rflags(vcpu, ctxt->eflags);
6401 }
6402 }
6403 EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
6404
6405 static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
6406 {
6407 ++vcpu->stat.insn_emulation_fail;
6408 trace_kvm_emulate_insn_failed(vcpu);
6409
6410 if (emulation_type & EMULTYPE_VMWARE_GP) {
6411 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
6412 return 1;
6413 }
6414
6415 if (emulation_type & EMULTYPE_SKIP) {
6416 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6417 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
6418 vcpu->run->internal.ndata = 0;
6419 return 0;
6420 }
6421
6422 kvm_queue_exception(vcpu, UD_VECTOR);
6423
6424 if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
6425 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
6426 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
6427 vcpu->run->internal.ndata = 0;
6428 return 0;
6429 }
6430
6431 return 1;
6432 }
6433
6434 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
6435 bool write_fault_to_shadow_pgtable,
6436 int emulation_type)
6437 {
6438 gpa_t gpa = cr2_or_gpa;
6439 kvm_pfn_t pfn;
6440
6441 if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
6442 return false;
6443
6444 if (WARN_ON_ONCE(is_guest_mode(vcpu)))
6445 return false;
6446
6447 if (!vcpu->arch.mmu->direct_map) {
6448
6449
6450
6451
6452 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
6453
6454
6455
6456
6457
6458 if (gpa == UNMAPPED_GVA)
6459 return true;
6460 }
6461
6462
6463
6464
6465
6466
6467
6468 pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
6469
6470
6471
6472
6473
6474 if (is_error_noslot_pfn(pfn))
6475 return false;
6476
6477 kvm_release_pfn_clean(pfn);
6478
6479
6480 if (vcpu->arch.mmu->direct_map) {
6481 unsigned int indirect_shadow_pages;
6482
6483 spin_lock(&vcpu->kvm->mmu_lock);
6484 indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
6485 spin_unlock(&vcpu->kvm->mmu_lock);
6486
6487 if (indirect_shadow_pages)
6488 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6489
6490 return true;
6491 }
6492
6493
6494
6495
6496
6497
6498 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6499
6500
6501
6502
6503
6504
6505 return !write_fault_to_shadow_pgtable;
6506 }
6507
6508 static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
6509 gpa_t cr2_or_gpa, int emulation_type)
6510 {
6511 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
6512 unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa;
6513
6514 last_retry_eip = vcpu->arch.last_retry_eip;
6515 last_retry_addr = vcpu->arch.last_retry_addr;
6516
6517
6518
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530 vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
6531
6532 if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
6533 return false;
6534
6535 if (WARN_ON_ONCE(is_guest_mode(vcpu)))
6536 return false;
6537
6538 if (x86_page_table_writing_insn(ctxt))
6539 return false;
6540
6541 if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa)
6542 return false;
6543
6544 vcpu->arch.last_retry_eip = ctxt->eip;
6545 vcpu->arch.last_retry_addr = cr2_or_gpa;
6546
6547 if (!vcpu->arch.mmu->direct_map)
6548 gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL);
6549
6550 kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
6551
6552 return true;
6553 }
6554
6555 static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
6556 static int complete_emulated_pio(struct kvm_vcpu *vcpu);
6557
6558 static void kvm_smm_changed(struct kvm_vcpu *vcpu)
6559 {
6560 if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
6561
6562 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
6563
6564
6565 kvm_make_request(KVM_REQ_EVENT, vcpu);
6566 }
6567
6568 kvm_mmu_reset_context(vcpu);
6569 }
6570
6571 static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
6572 unsigned long *db)
6573 {
6574 u32 dr6 = 0;
6575 int i;
6576 u32 enable, rwlen;
6577
6578 enable = dr7;
6579 rwlen = dr7 >> 16;
6580 for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
6581 if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
6582 dr6 |= (1 << i);
6583 return dr6;
6584 }
6585
6586 static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
6587 {
6588 struct kvm_run *kvm_run = vcpu->run;
6589
6590 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
6591 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
6592 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
6593 kvm_run->debug.arch.exception = DB_VECTOR;
6594 kvm_run->exit_reason = KVM_EXIT_DEBUG;
6595 return 0;
6596 }
6597 kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
6598 return 1;
6599 }
6600
6601 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
6602 {
6603 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
6604 int r;
6605
6606 r = kvm_x86_ops->skip_emulated_instruction(vcpu);
6607 if (unlikely(!r))
6608 return 0;
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618 if (unlikely(rflags & X86_EFLAGS_TF))
6619 r = kvm_vcpu_do_singlestep(vcpu);
6620 return r;
6621 }
6622 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
6623
6624 static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
6625 {
6626 if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
6627 (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
6628 struct kvm_run *kvm_run = vcpu->run;
6629 unsigned long eip = kvm_get_linear_rip(vcpu);
6630 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
6631 vcpu->arch.guest_debug_dr7,
6632 vcpu->arch.eff_db);
6633
6634 if (dr6 != 0) {
6635 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
6636 kvm_run->debug.arch.pc = eip;
6637 kvm_run->debug.arch.exception = DB_VECTOR;
6638 kvm_run->exit_reason = KVM_EXIT_DEBUG;
6639 *r = 0;
6640 return true;
6641 }
6642 }
6643
6644 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
6645 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
6646 unsigned long eip = kvm_get_linear_rip(vcpu);
6647 u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
6648 vcpu->arch.dr7,
6649 vcpu->arch.db);
6650
6651 if (dr6 != 0) {
6652 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
6653 vcpu->arch.dr6 |= dr6 | DR6_RTM;
6654 kvm_queue_exception(vcpu, DB_VECTOR);
6655 *r = 1;
6656 return true;
6657 }
6658 }
6659
6660 return false;
6661 }
6662
6663 static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
6664 {
6665 switch (ctxt->opcode_len) {
6666 case 1:
6667 switch (ctxt->b) {
6668 case 0xe4:
6669 case 0xe5:
6670 case 0xec:
6671 case 0xed:
6672 case 0xe6:
6673 case 0xe7:
6674 case 0xee:
6675 case 0xef:
6676 case 0x6c:
6677 case 0x6d:
6678 case 0x6e:
6679 case 0x6f:
6680 return true;
6681 }
6682 break;
6683 case 2:
6684 switch (ctxt->b) {
6685 case 0x33:
6686 return true;
6687 }
6688 break;
6689 }
6690
6691 return false;
6692 }
6693
6694 int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
6695 int emulation_type, void *insn, int insn_len)
6696 {
6697 int r;
6698 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
6699 bool writeback = true;
6700 bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
6701
6702 vcpu->arch.l1tf_flush_l1d = true;
6703
6704
6705
6706
6707
6708 vcpu->arch.write_fault_to_shadow_pgtable = false;
6709 kvm_clear_exception_queue(vcpu);
6710
6711 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
6712 init_emulate_ctxt(vcpu);
6713
6714
6715
6716
6717
6718
6719
6720 if (!(emulation_type & EMULTYPE_SKIP) &&
6721 kvm_vcpu_check_breakpoint(vcpu, &r))
6722 return r;
6723
6724 ctxt->interruptibility = 0;
6725 ctxt->have_exception = false;
6726 ctxt->exception.vector = -1;
6727 ctxt->perm_ok = false;
6728
6729 ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
6730
6731 r = x86_decode_insn(ctxt, insn, insn_len);
6732
6733 trace_kvm_emulate_insn_start(vcpu);
6734 ++vcpu->stat.insn_emulation;
6735 if (r != EMULATION_OK) {
6736 if ((emulation_type & EMULTYPE_TRAP_UD) ||
6737 (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
6738 kvm_queue_exception(vcpu, UD_VECTOR);
6739 return 1;
6740 }
6741 if (reexecute_instruction(vcpu, cr2_or_gpa,
6742 write_fault_to_spt,
6743 emulation_type))
6744 return 1;
6745 if (ctxt->have_exception) {
6746
6747
6748
6749
6750 WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
6751 exception_type(ctxt->exception.vector) == EXCPT_TRAP);
6752 inject_emulated_exception(vcpu);
6753 return 1;
6754 }
6755 return handle_emulation_failure(vcpu, emulation_type);
6756 }
6757 }
6758
6759 if ((emulation_type & EMULTYPE_VMWARE_GP) &&
6760 !is_vmware_backdoor_opcode(ctxt)) {
6761 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
6762 return 1;
6763 }
6764
6765
6766
6767
6768
6769
6770 if (emulation_type & EMULTYPE_SKIP) {
6771 kvm_rip_write(vcpu, ctxt->_eip);
6772 if (ctxt->eflags & X86_EFLAGS_RF)
6773 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
6774 return 1;
6775 }
6776
6777 if (retry_instruction(ctxt, cr2_or_gpa, emulation_type))
6778 return 1;
6779
6780
6781
6782 if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
6783 vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
6784 emulator_invalidate_register_cache(ctxt);
6785 }
6786
6787 restart:
6788
6789 ctxt->exception.address = cr2_or_gpa;
6790
6791 r = x86_emulate_insn(ctxt);
6792
6793 if (r == EMULATION_INTERCEPTED)
6794 return 1;
6795
6796 if (r == EMULATION_FAILED) {
6797 if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
6798 emulation_type))
6799 return 1;
6800
6801 return handle_emulation_failure(vcpu, emulation_type);
6802 }
6803
6804 if (ctxt->have_exception) {
6805 r = 1;
6806 if (inject_emulated_exception(vcpu))
6807 return r;
6808 } else if (vcpu->arch.pio.count) {
6809 if (!vcpu->arch.pio.in) {
6810
6811 vcpu->arch.pio.count = 0;
6812 } else {
6813 writeback = false;
6814 vcpu->arch.complete_userspace_io = complete_emulated_pio;
6815 }
6816 r = 0;
6817 } else if (vcpu->mmio_needed) {
6818 ++vcpu->stat.mmio_exits;
6819
6820 if (!vcpu->mmio_is_write)
6821 writeback = false;
6822 r = 0;
6823 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
6824 } else if (r == EMULATION_RESTART)
6825 goto restart;
6826 else
6827 r = 1;
6828
6829 if (writeback) {
6830 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
6831 toggle_interruptibility(vcpu, ctxt->interruptibility);
6832 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
6833 if (!ctxt->have_exception ||
6834 exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
6835 kvm_rip_write(vcpu, ctxt->eip);
6836 if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
6837 r = kvm_vcpu_do_singlestep(vcpu);
6838 __kvm_set_rflags(vcpu, ctxt->eflags);
6839 }
6840
6841
6842
6843
6844
6845
6846
6847 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
6848 kvm_make_request(KVM_REQ_EVENT, vcpu);
6849 } else
6850 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
6851
6852 return r;
6853 }
6854
6855 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
6856 {
6857 return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
6858 }
6859 EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
6860
6861 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
6862 void *insn, int insn_len)
6863 {
6864 return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
6865 }
6866 EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
6867
6868 static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
6869 {
6870 vcpu->arch.pio.count = 0;
6871 return 1;
6872 }
6873
6874 static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
6875 {
6876 vcpu->arch.pio.count = 0;
6877
6878 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip)))
6879 return 1;
6880
6881 return kvm_skip_emulated_instruction(vcpu);
6882 }
6883
6884 static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
6885 unsigned short port)
6886 {
6887 unsigned long val = kvm_rax_read(vcpu);
6888 int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
6889 size, port, &val, 1);
6890 if (ret)
6891 return ret;
6892
6893
6894
6895
6896
6897 if (port == 0x7e &&
6898 kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
6899 vcpu->arch.complete_userspace_io =
6900 complete_fast_pio_out_port_0x7e;
6901 kvm_skip_emulated_instruction(vcpu);
6902 } else {
6903 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
6904 vcpu->arch.complete_userspace_io = complete_fast_pio_out;
6905 }
6906 return 0;
6907 }
6908
6909 static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
6910 {
6911 unsigned long val;
6912
6913
6914 BUG_ON(vcpu->arch.pio.count != 1);
6915
6916 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) {
6917 vcpu->arch.pio.count = 0;
6918 return 1;
6919 }
6920
6921
6922 val = (vcpu->arch.pio.size < 4) ? kvm_rax_read(vcpu) : 0;
6923
6924
6925
6926
6927
6928 emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
6929 vcpu->arch.pio.port, &val, 1);
6930 kvm_rax_write(vcpu, val);
6931
6932 return kvm_skip_emulated_instruction(vcpu);
6933 }
6934
6935 static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
6936 unsigned short port)
6937 {
6938 unsigned long val;
6939 int ret;
6940
6941
6942 val = (size < 4) ? kvm_rax_read(vcpu) : 0;
6943
6944 ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
6945 &val, 1);
6946 if (ret) {
6947 kvm_rax_write(vcpu, val);
6948 return ret;
6949 }
6950
6951 vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
6952 vcpu->arch.complete_userspace_io = complete_fast_pio_in;
6953
6954 return 0;
6955 }
6956
6957 int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
6958 {
6959 int ret;
6960
6961 if (in)
6962 ret = kvm_fast_pio_in(vcpu, size, port);
6963 else
6964 ret = kvm_fast_pio_out(vcpu, size, port);
6965 return ret && kvm_skip_emulated_instruction(vcpu);
6966 }
6967 EXPORT_SYMBOL_GPL(kvm_fast_pio);
6968
6969 static int kvmclock_cpu_down_prep(unsigned int cpu)
6970 {
6971 __this_cpu_write(cpu_tsc_khz, 0);
6972 return 0;
6973 }
6974
6975 static void tsc_khz_changed(void *data)
6976 {
6977 struct cpufreq_freqs *freq = data;
6978 unsigned long khz = 0;
6979
6980 if (data)
6981 khz = freq->new;
6982 else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
6983 khz = cpufreq_quick_get(raw_smp_processor_id());
6984 if (!khz)
6985 khz = tsc_khz;
6986 __this_cpu_write(cpu_tsc_khz, khz);
6987 }
6988
6989 #ifdef CONFIG_X86_64
6990 static void kvm_hyperv_tsc_notifier(void)
6991 {
6992 struct kvm *kvm;
6993 struct kvm_vcpu *vcpu;
6994 int cpu;
6995
6996 mutex_lock(&kvm_lock);
6997 list_for_each_entry(kvm, &vm_list, vm_list)
6998 kvm_make_mclock_inprogress_request(kvm);
6999
7000 hyperv_stop_tsc_emulation();
7001
7002
7003 for_each_present_cpu(cpu)
7004 per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
7005 kvm_max_guest_tsc_khz = tsc_khz;
7006
7007 list_for_each_entry(kvm, &vm_list, vm_list) {
7008 struct kvm_arch *ka = &kvm->arch;
7009
7010 spin_lock(&ka->pvclock_gtod_sync_lock);
7011
7012 pvclock_update_vm_gtod_copy(kvm);
7013
7014 kvm_for_each_vcpu(cpu, vcpu, kvm)
7015 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7016
7017 kvm_for_each_vcpu(cpu, vcpu, kvm)
7018 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
7019
7020 spin_unlock(&ka->pvclock_gtod_sync_lock);
7021 }
7022 mutex_unlock(&kvm_lock);
7023 }
7024 #endif
7025
7026 static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
7027 {
7028 struct kvm *kvm;
7029 struct kvm_vcpu *vcpu;
7030 int i, send_ipi = 0;
7031
7032
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045
7046
7047
7048
7049
7050
7051
7052
7053
7054
7055
7056
7057
7058
7059
7060
7061
7062
7063
7064
7065
7066
7067
7068
7069
7070
7071 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
7072
7073 mutex_lock(&kvm_lock);
7074 list_for_each_entry(kvm, &vm_list, vm_list) {
7075 kvm_for_each_vcpu(i, vcpu, kvm) {
7076 if (vcpu->cpu != cpu)
7077 continue;
7078 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
7079 if (vcpu->cpu != raw_smp_processor_id())
7080 send_ipi = 1;
7081 }
7082 }
7083 mutex_unlock(&kvm_lock);
7084
7085 if (freq->old < freq->new && send_ipi) {
7086
7087
7088
7089
7090
7091
7092
7093
7094
7095
7096
7097
7098 smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
7099 }
7100 }
7101
7102 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
7103 void *data)
7104 {
7105 struct cpufreq_freqs *freq = data;
7106 int cpu;
7107
7108 if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
7109 return 0;
7110 if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
7111 return 0;
7112
7113 for_each_cpu(cpu, freq->policy->cpus)
7114 __kvmclock_cpufreq_notifier(freq, cpu);
7115
7116 return 0;
7117 }
7118
7119 static struct notifier_block kvmclock_cpufreq_notifier_block = {
7120 .notifier_call = kvmclock_cpufreq_notifier
7121 };
7122
7123 static int kvmclock_cpu_online(unsigned int cpu)
7124 {
7125 tsc_khz_changed(NULL);
7126 return 0;
7127 }
7128
7129 static void kvm_timer_init(void)
7130 {
7131 max_tsc_khz = tsc_khz;
7132
7133 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
7134 #ifdef CONFIG_CPU_FREQ
7135 struct cpufreq_policy policy;
7136 int cpu;
7137
7138 memset(&policy, 0, sizeof(policy));
7139 cpu = get_cpu();
7140 cpufreq_get_policy(&policy, cpu);
7141 if (policy.cpuinfo.max_freq)
7142 max_tsc_khz = policy.cpuinfo.max_freq;
7143 put_cpu();
7144 #endif
7145 cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
7146 CPUFREQ_TRANSITION_NOTIFIER);
7147 }
7148
7149 cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
7150 kvmclock_cpu_online, kvmclock_cpu_down_prep);
7151 }
7152
7153 DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
7154 EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
7155
7156 int kvm_is_in_guest(void)
7157 {
7158 return __this_cpu_read(current_vcpu) != NULL;
7159 }
7160
7161 static int kvm_is_user_mode(void)
7162 {
7163 int user_mode = 3;
7164
7165 if (__this_cpu_read(current_vcpu))
7166 user_mode = kvm_x86_ops->get_cpl(__this_cpu_read(current_vcpu));
7167
7168 return user_mode != 0;
7169 }
7170
7171 static unsigned long kvm_get_guest_ip(void)
7172 {
7173 unsigned long ip = 0;
7174
7175 if (__this_cpu_read(current_vcpu))
7176 ip = kvm_rip_read(__this_cpu_read(current_vcpu));
7177
7178 return ip;
7179 }
7180
7181 static void kvm_handle_intel_pt_intr(void)
7182 {
7183 struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
7184
7185 kvm_make_request(KVM_REQ_PMI, vcpu);
7186 __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
7187 (unsigned long *)&vcpu->arch.pmu.global_status);
7188 }
7189
7190 static struct perf_guest_info_callbacks kvm_guest_cbs = {
7191 .is_in_guest = kvm_is_in_guest,
7192 .is_user_mode = kvm_is_user_mode,
7193 .get_guest_ip = kvm_get_guest_ip,
7194 .handle_intel_pt_intr = kvm_handle_intel_pt_intr,
7195 };
7196
7197 #ifdef CONFIG_X86_64
7198 static void pvclock_gtod_update_fn(struct work_struct *work)
7199 {
7200 struct kvm *kvm;
7201
7202 struct kvm_vcpu *vcpu;
7203 int i;
7204
7205 mutex_lock(&kvm_lock);
7206 list_for_each_entry(kvm, &vm_list, vm_list)
7207 kvm_for_each_vcpu(i, vcpu, kvm)
7208 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
7209 atomic_set(&kvm_guest_has_master_clock, 0);
7210 mutex_unlock(&kvm_lock);
7211 }
7212
7213 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
7214
7215
7216
7217
7218 static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
7219 void *priv)
7220 {
7221 struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
7222 struct timekeeper *tk = priv;
7223
7224 update_pvclock_gtod(tk);
7225
7226
7227
7228
7229 if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
7230 atomic_read(&kvm_guest_has_master_clock) != 0)
7231 queue_work(system_long_wq, &pvclock_gtod_work);
7232
7233 return 0;
7234 }
7235
7236 static struct notifier_block pvclock_gtod_notifier = {
7237 .notifier_call = pvclock_gtod_notify,
7238 };
7239 #endif
7240
7241 int kvm_arch_init(void *opaque)
7242 {
7243 int r;
7244 struct kvm_x86_ops *ops = opaque;
7245
7246 if (kvm_x86_ops) {
7247 printk(KERN_ERR "kvm: already loaded the other module\n");
7248 r = -EEXIST;
7249 goto out;
7250 }
7251
7252 if (!ops->cpu_has_kvm_support()) {
7253 printk(KERN_ERR "kvm: no hardware support\n");
7254 r = -EOPNOTSUPP;
7255 goto out;
7256 }
7257 if (ops->disabled_by_bios()) {
7258 printk(KERN_ERR "kvm: disabled by bios\n");
7259 r = -EOPNOTSUPP;
7260 goto out;
7261 }
7262
7263
7264
7265
7266
7267
7268 if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
7269 printk(KERN_ERR "kvm: inadequate fpu\n");
7270 r = -EOPNOTSUPP;
7271 goto out;
7272 }
7273
7274 r = -ENOMEM;
7275 x86_fpu_cache = kmem_cache_create("x86_fpu", sizeof(struct fpu),
7276 __alignof__(struct fpu), SLAB_ACCOUNT,
7277 NULL);
7278 if (!x86_fpu_cache) {
7279 printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
7280 goto out;
7281 }
7282
7283 shared_msrs = alloc_percpu(struct kvm_shared_msrs);
7284 if (!shared_msrs) {
7285 printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
7286 goto out_free_x86_fpu_cache;
7287 }
7288
7289 r = kvm_mmu_module_init();
7290 if (r)
7291 goto out_free_percpu;
7292
7293 kvm_x86_ops = ops;
7294
7295 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
7296 PT_DIRTY_MASK, PT64_NX_MASK, 0,
7297 PT_PRESENT_MASK, 0, sme_me_mask);
7298 kvm_timer_init();
7299
7300 perf_register_guest_info_callbacks(&kvm_guest_cbs);
7301
7302 if (boot_cpu_has(X86_FEATURE_XSAVE))
7303 host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
7304
7305 kvm_lapic_init();
7306 if (pi_inject_timer == -1)
7307 pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
7308 #ifdef CONFIG_X86_64
7309 pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
7310
7311 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
7312 set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
7313 #endif
7314
7315 return 0;
7316
7317 out_free_percpu:
7318 free_percpu(shared_msrs);
7319 out_free_x86_fpu_cache:
7320 kmem_cache_destroy(x86_fpu_cache);
7321 out:
7322 return r;
7323 }
7324
7325 void kvm_arch_exit(void)
7326 {
7327 #ifdef CONFIG_X86_64
7328 if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
7329 clear_hv_tscchange_cb();
7330 #endif
7331 kvm_lapic_exit();
7332 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
7333
7334 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
7335 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
7336 CPUFREQ_TRANSITION_NOTIFIER);
7337 cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
7338 #ifdef CONFIG_X86_64
7339 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
7340 #endif
7341 kvm_x86_ops = NULL;
7342 kvm_mmu_module_exit();
7343 free_percpu(shared_msrs);
7344 kmem_cache_destroy(x86_fpu_cache);
7345 }
7346
7347 int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
7348 {
7349 ++vcpu->stat.halt_exits;
7350 if (lapic_in_kernel(vcpu)) {
7351 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
7352 return 1;
7353 } else {
7354 vcpu->run->exit_reason = KVM_EXIT_HLT;
7355 return 0;
7356 }
7357 }
7358 EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
7359
7360 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
7361 {
7362 int ret = kvm_skip_emulated_instruction(vcpu);
7363
7364
7365
7366
7367 return kvm_vcpu_halt(vcpu) && ret;
7368 }
7369 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
7370
7371 #ifdef CONFIG_X86_64
7372 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
7373 unsigned long clock_type)
7374 {
7375 struct kvm_clock_pairing clock_pairing;
7376 struct timespec64 ts;
7377 u64 cycle;
7378 int ret;
7379
7380 if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
7381 return -KVM_EOPNOTSUPP;
7382
7383 if (kvm_get_walltime_and_clockread(&ts, &cycle) == false)
7384 return -KVM_EOPNOTSUPP;
7385
7386 clock_pairing.sec = ts.tv_sec;
7387 clock_pairing.nsec = ts.tv_nsec;
7388 clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
7389 clock_pairing.flags = 0;
7390 memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
7391
7392 ret = 0;
7393 if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
7394 sizeof(struct kvm_clock_pairing)))
7395 ret = -KVM_EFAULT;
7396
7397 return ret;
7398 }
7399 #endif
7400
7401
7402
7403
7404
7405
7406 static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
7407 {
7408 struct kvm_lapic_irq lapic_irq;
7409
7410 lapic_irq.shorthand = 0;
7411 lapic_irq.dest_mode = 0;
7412 lapic_irq.level = 0;
7413 lapic_irq.dest_id = apicid;
7414 lapic_irq.msi_redir_hint = false;
7415
7416 lapic_irq.delivery_mode = APIC_DM_REMRD;
7417 kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
7418 }
7419
7420 void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
7421 {
7422 if (!lapic_in_kernel(vcpu)) {
7423 WARN_ON_ONCE(vcpu->arch.apicv_active);
7424 return;
7425 }
7426 if (!vcpu->arch.apicv_active)
7427 return;
7428
7429 vcpu->arch.apicv_active = false;
7430 kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
7431 }
7432
7433 static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
7434 {
7435 struct kvm_vcpu *target = NULL;
7436 struct kvm_apic_map *map;
7437
7438 rcu_read_lock();
7439 map = rcu_dereference(kvm->arch.apic_map);
7440
7441 if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
7442 target = map->phys_map[dest_id]->vcpu;
7443
7444 rcu_read_unlock();
7445
7446 if (target && READ_ONCE(target->ready))
7447 kvm_vcpu_yield_to(target);
7448 }
7449
7450 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
7451 {
7452 unsigned long nr, a0, a1, a2, a3, ret;
7453 int op_64_bit;
7454
7455 if (kvm_hv_hypercall_enabled(vcpu->kvm))
7456 return kvm_hv_hypercall(vcpu);
7457
7458 nr = kvm_rax_read(vcpu);
7459 a0 = kvm_rbx_read(vcpu);
7460 a1 = kvm_rcx_read(vcpu);
7461 a2 = kvm_rdx_read(vcpu);
7462 a3 = kvm_rsi_read(vcpu);
7463
7464 trace_kvm_hypercall(nr, a0, a1, a2, a3);
7465
7466 op_64_bit = is_64_bit_mode(vcpu);
7467 if (!op_64_bit) {
7468 nr &= 0xFFFFFFFF;
7469 a0 &= 0xFFFFFFFF;
7470 a1 &= 0xFFFFFFFF;
7471 a2 &= 0xFFFFFFFF;
7472 a3 &= 0xFFFFFFFF;
7473 }
7474
7475 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
7476 ret = -KVM_EPERM;
7477 goto out;
7478 }
7479
7480 switch (nr) {
7481 case KVM_HC_VAPIC_POLL_IRQ:
7482 ret = 0;
7483 break;
7484 case KVM_HC_KICK_CPU:
7485 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
7486 kvm_sched_yield(vcpu->kvm, a1);
7487 ret = 0;
7488 break;
7489 #ifdef CONFIG_X86_64
7490 case KVM_HC_CLOCK_PAIRING:
7491 ret = kvm_pv_clock_pairing(vcpu, a0, a1);
7492 break;
7493 #endif
7494 case KVM_HC_SEND_IPI:
7495 ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
7496 break;
7497 case KVM_HC_SCHED_YIELD:
7498 kvm_sched_yield(vcpu->kvm, a0);
7499 ret = 0;
7500 break;
7501 default:
7502 ret = -KVM_ENOSYS;
7503 break;
7504 }
7505 out:
7506 if (!op_64_bit)
7507 ret = (u32)ret;
7508 kvm_rax_write(vcpu, ret);
7509
7510 ++vcpu->stat.hypercalls;
7511 return kvm_skip_emulated_instruction(vcpu);
7512 }
7513 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
7514
7515 static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
7516 {
7517 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
7518 char instruction[3];
7519 unsigned long rip = kvm_rip_read(vcpu);
7520
7521 kvm_x86_ops->patch_hypercall(vcpu, instruction);
7522
7523 return emulator_write_emulated(ctxt, rip, instruction, 3,
7524 &ctxt->exception);
7525 }
7526
7527 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
7528 {
7529 return vcpu->run->request_interrupt_window &&
7530 likely(!pic_in_kernel(vcpu->kvm));
7531 }
7532
7533 static void post_kvm_run_save(struct kvm_vcpu *vcpu)
7534 {
7535 struct kvm_run *kvm_run = vcpu->run;
7536
7537 kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
7538 kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
7539 kvm_run->cr8 = kvm_get_cr8(vcpu);
7540 kvm_run->apic_base = kvm_get_apic_base(vcpu);
7541 kvm_run->ready_for_interrupt_injection =
7542 pic_in_kernel(vcpu->kvm) ||
7543 kvm_vcpu_ready_for_interrupt_injection(vcpu);
7544 }
7545
7546 static void update_cr8_intercept(struct kvm_vcpu *vcpu)
7547 {
7548 int max_irr, tpr;
7549
7550 if (!kvm_x86_ops->update_cr8_intercept)
7551 return;
7552
7553 if (!lapic_in_kernel(vcpu))
7554 return;
7555
7556 if (vcpu->arch.apicv_active)
7557 return;
7558
7559 if (!vcpu->arch.apic->vapic_addr)
7560 max_irr = kvm_lapic_find_highest_irr(vcpu);
7561 else
7562 max_irr = -1;
7563
7564 if (max_irr != -1)
7565 max_irr >>= 4;
7566
7567 tpr = kvm_lapic_get_cr8(vcpu);
7568
7569 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
7570 }
7571
7572 static int inject_pending_event(struct kvm_vcpu *vcpu)
7573 {
7574 int r;
7575
7576
7577
7578 if (vcpu->arch.exception.injected)
7579 kvm_x86_ops->queue_exception(vcpu);
7580
7581
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594 else if (!vcpu->arch.exception.pending) {
7595 if (vcpu->arch.nmi_injected)
7596 kvm_x86_ops->set_nmi(vcpu);
7597 else if (vcpu->arch.interrupt.injected)
7598 kvm_x86_ops->set_irq(vcpu);
7599 }
7600
7601
7602
7603
7604
7605
7606
7607 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
7608 r = kvm_x86_ops->check_nested_events(vcpu);
7609 if (r != 0)
7610 return r;
7611 }
7612
7613
7614 if (vcpu->arch.exception.pending) {
7615 trace_kvm_inj_exception(vcpu->arch.exception.nr,
7616 vcpu->arch.exception.has_error_code,
7617 vcpu->arch.exception.error_code);
7618
7619 WARN_ON_ONCE(vcpu->arch.exception.injected);
7620 vcpu->arch.exception.pending = false;
7621 vcpu->arch.exception.injected = true;
7622
7623 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
7624 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
7625 X86_EFLAGS_RF);
7626
7627 if (vcpu->arch.exception.nr == DB_VECTOR) {
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638 kvm_deliver_exception_payload(vcpu);
7639 if (vcpu->arch.dr7 & DR7_GD) {
7640 vcpu->arch.dr7 &= ~DR7_GD;
7641 kvm_update_dr7(vcpu);
7642 }
7643 }
7644
7645 kvm_x86_ops->queue_exception(vcpu);
7646 }
7647
7648
7649 if (kvm_event_needs_reinjection(vcpu))
7650 return 0;
7651
7652 if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
7653 kvm_x86_ops->smi_allowed(vcpu)) {
7654 vcpu->arch.smi_pending = false;
7655 ++vcpu->arch.smi_count;
7656 enter_smm(vcpu);
7657 } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
7658 --vcpu->arch.nmi_pending;
7659 vcpu->arch.nmi_injected = true;
7660 kvm_x86_ops->set_nmi(vcpu);
7661 } else if (kvm_cpu_has_injectable_intr(vcpu)) {
7662
7663
7664
7665
7666
7667
7668
7669 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
7670 r = kvm_x86_ops->check_nested_events(vcpu);
7671 if (r != 0)
7672 return r;
7673 }
7674 if (kvm_x86_ops->interrupt_allowed(vcpu)) {
7675 kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
7676 false);
7677 kvm_x86_ops->set_irq(vcpu);
7678 }
7679 }
7680
7681 return 0;
7682 }
7683
7684 static void process_nmi(struct kvm_vcpu *vcpu)
7685 {
7686 unsigned limit = 2;
7687
7688
7689
7690
7691
7692
7693 if (kvm_x86_ops->get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
7694 limit = 1;
7695
7696 vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
7697 vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);
7698 kvm_make_request(KVM_REQ_EVENT, vcpu);
7699 }
7700
7701 static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
7702 {
7703 u32 flags = 0;
7704 flags |= seg->g << 23;
7705 flags |= seg->db << 22;
7706 flags |= seg->l << 21;
7707 flags |= seg->avl << 20;
7708 flags |= seg->present << 15;
7709 flags |= seg->dpl << 13;
7710 flags |= seg->s << 12;
7711 flags |= seg->type << 8;
7712 return flags;
7713 }
7714
7715 static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
7716 {
7717 struct kvm_segment seg;
7718 int offset;
7719
7720 kvm_get_segment(vcpu, &seg, n);
7721 put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
7722
7723 if (n < 3)
7724 offset = 0x7f84 + n * 12;
7725 else
7726 offset = 0x7f2c + (n - 3) * 12;
7727
7728 put_smstate(u32, buf, offset + 8, seg.base);
7729 put_smstate(u32, buf, offset + 4, seg.limit);
7730 put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
7731 }
7732
7733 #ifdef CONFIG_X86_64
7734 static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
7735 {
7736 struct kvm_segment seg;
7737 int offset;
7738 u16 flags;
7739
7740 kvm_get_segment(vcpu, &seg, n);
7741 offset = 0x7e00 + n * 16;
7742
7743 flags = enter_smm_get_segment_flags(&seg) >> 8;
7744 put_smstate(u16, buf, offset, seg.selector);
7745 put_smstate(u16, buf, offset + 2, flags);
7746 put_smstate(u32, buf, offset + 4, seg.limit);
7747 put_smstate(u64, buf, offset + 8, seg.base);
7748 }
7749 #endif
7750
7751 static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
7752 {
7753 struct desc_ptr dt;
7754 struct kvm_segment seg;
7755 unsigned long val;
7756 int i;
7757
7758 put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
7759 put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
7760 put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
7761 put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
7762
7763 for (i = 0; i < 8; i++)
7764 put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read(vcpu, i));
7765
7766 kvm_get_dr(vcpu, 6, &val);
7767 put_smstate(u32, buf, 0x7fcc, (u32)val);
7768 kvm_get_dr(vcpu, 7, &val);
7769 put_smstate(u32, buf, 0x7fc8, (u32)val);
7770
7771 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
7772 put_smstate(u32, buf, 0x7fc4, seg.selector);
7773 put_smstate(u32, buf, 0x7f64, seg.base);
7774 put_smstate(u32, buf, 0x7f60, seg.limit);
7775 put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
7776
7777 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
7778 put_smstate(u32, buf, 0x7fc0, seg.selector);
7779 put_smstate(u32, buf, 0x7f80, seg.base);
7780 put_smstate(u32, buf, 0x7f7c, seg.limit);
7781 put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
7782
7783 kvm_x86_ops->get_gdt(vcpu, &dt);
7784 put_smstate(u32, buf, 0x7f74, dt.address);
7785 put_smstate(u32, buf, 0x7f70, dt.size);
7786
7787 kvm_x86_ops->get_idt(vcpu, &dt);
7788 put_smstate(u32, buf, 0x7f58, dt.address);
7789 put_smstate(u32, buf, 0x7f54, dt.size);
7790
7791 for (i = 0; i < 6; i++)
7792 enter_smm_save_seg_32(vcpu, buf, i);
7793
7794 put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
7795
7796
7797 put_smstate(u32, buf, 0x7efc, 0x00020000);
7798 put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
7799 }
7800
7801 #ifdef CONFIG_X86_64
7802 static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
7803 {
7804 struct desc_ptr dt;
7805 struct kvm_segment seg;
7806 unsigned long val;
7807 int i;
7808
7809 for (i = 0; i < 16; i++)
7810 put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read(vcpu, i));
7811
7812 put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
7813 put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
7814
7815 kvm_get_dr(vcpu, 6, &val);
7816 put_smstate(u64, buf, 0x7f68, val);
7817 kvm_get_dr(vcpu, 7, &val);
7818 put_smstate(u64, buf, 0x7f60, val);
7819
7820 put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
7821 put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
7822 put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
7823
7824 put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
7825
7826
7827 put_smstate(u32, buf, 0x7efc, 0x00020064);
7828
7829 put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
7830
7831 kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
7832 put_smstate(u16, buf, 0x7e90, seg.selector);
7833 put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
7834 put_smstate(u32, buf, 0x7e94, seg.limit);
7835 put_smstate(u64, buf, 0x7e98, seg.base);
7836
7837 kvm_x86_ops->get_idt(vcpu, &dt);
7838 put_smstate(u32, buf, 0x7e84, dt.size);
7839 put_smstate(u64, buf, 0x7e88, dt.address);
7840
7841 kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
7842 put_smstate(u16, buf, 0x7e70, seg.selector);
7843 put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
7844 put_smstate(u32, buf, 0x7e74, seg.limit);
7845 put_smstate(u64, buf, 0x7e78, seg.base);
7846
7847 kvm_x86_ops->get_gdt(vcpu, &dt);
7848 put_smstate(u32, buf, 0x7e64, dt.size);
7849 put_smstate(u64, buf, 0x7e68, dt.address);
7850
7851 for (i = 0; i < 6; i++)
7852 enter_smm_save_seg_64(vcpu, buf, i);
7853 }
7854 #endif
7855
7856 static void enter_smm(struct kvm_vcpu *vcpu)
7857 {
7858 struct kvm_segment cs, ds;
7859 struct desc_ptr dt;
7860 char buf[512];
7861 u32 cr0;
7862
7863 trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
7864 memset(buf, 0, 512);
7865 #ifdef CONFIG_X86_64
7866 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
7867 enter_smm_save_state_64(vcpu, buf);
7868 else
7869 #endif
7870 enter_smm_save_state_32(vcpu, buf);
7871
7872
7873
7874
7875
7876
7877 kvm_x86_ops->pre_enter_smm(vcpu, buf);
7878
7879 vcpu->arch.hflags |= HF_SMM_MASK;
7880 kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
7881
7882 if (kvm_x86_ops->get_nmi_mask(vcpu))
7883 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
7884 else
7885 kvm_x86_ops->set_nmi_mask(vcpu, true);
7886
7887 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
7888 kvm_rip_write(vcpu, 0x8000);
7889
7890 cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
7891 kvm_x86_ops->set_cr0(vcpu, cr0);
7892 vcpu->arch.cr0 = cr0;
7893
7894 kvm_x86_ops->set_cr4(vcpu, 0);
7895
7896
7897 dt.address = dt.size = 0;
7898 kvm_x86_ops->set_idt(vcpu, &dt);
7899
7900 __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
7901
7902 cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
7903 cs.base = vcpu->arch.smbase;
7904
7905 ds.selector = 0;
7906 ds.base = 0;
7907
7908 cs.limit = ds.limit = 0xffffffff;
7909 cs.type = ds.type = 0x3;
7910 cs.dpl = ds.dpl = 0;
7911 cs.db = ds.db = 0;
7912 cs.s = ds.s = 1;
7913 cs.l = ds.l = 0;
7914 cs.g = ds.g = 1;
7915 cs.avl = ds.avl = 0;
7916 cs.present = ds.present = 1;
7917 cs.unusable = ds.unusable = 0;
7918 cs.padding = ds.padding = 0;
7919
7920 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
7921 kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
7922 kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
7923 kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
7924 kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
7925 kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
7926
7927 #ifdef CONFIG_X86_64
7928 if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
7929 kvm_x86_ops->set_efer(vcpu, 0);
7930 #endif
7931
7932 kvm_update_cpuid(vcpu);
7933 kvm_mmu_reset_context(vcpu);
7934 }
7935
7936 static void process_smi(struct kvm_vcpu *vcpu)
7937 {
7938 vcpu->arch.smi_pending = true;
7939 kvm_make_request(KVM_REQ_EVENT, vcpu);
7940 }
7941
7942 void kvm_make_scan_ioapic_request(struct kvm *kvm)
7943 {
7944 kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
7945 }
7946
7947 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
7948 {
7949 if (!kvm_apic_present(vcpu))
7950 return;
7951
7952 bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
7953
7954 if (irqchip_split(vcpu->kvm))
7955 kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
7956 else {
7957 if (vcpu->arch.apicv_active)
7958 kvm_x86_ops->sync_pir_to_irr(vcpu);
7959 if (ioapic_in_kernel(vcpu->kvm))
7960 kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
7961 }
7962
7963 if (is_guest_mode(vcpu))
7964 vcpu->arch.load_eoi_exitmap_pending = true;
7965 else
7966 kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
7967 }
7968
7969 static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
7970 {
7971 u64 eoi_exit_bitmap[4];
7972
7973 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
7974 return;
7975
7976 bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
7977 vcpu_to_synic(vcpu)->vec_bitmap, 256);
7978 kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
7979 }
7980
7981 void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
7982 unsigned long start, unsigned long end)
7983 {
7984 unsigned long apic_address;
7985
7986
7987
7988
7989
7990 apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
7991 if (start <= apic_address && apic_address < end)
7992 kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
7993 }
7994
7995 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
7996 {
7997 struct page *page = NULL;
7998
7999 if (!lapic_in_kernel(vcpu))
8000 return;
8001
8002 if (!kvm_x86_ops->set_apic_access_page_addr)
8003 return;
8004
8005 page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
8006 if (is_error_page(page))
8007 return;
8008 kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
8009
8010
8011
8012
8013
8014 put_page(page);
8015 }
8016 EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
8017
8018 void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
8019 {
8020 smp_send_reschedule(vcpu->cpu);
8021 }
8022 EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
8023
8024
8025
8026
8027
8028
8029 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
8030 {
8031 int r;
8032 bool req_int_win =
8033 dm_request_for_irq_injection(vcpu) &&
8034 kvm_cpu_accept_dm_intr(vcpu);
8035
8036 bool req_immediate_exit = false;
8037
8038 if (kvm_request_pending(vcpu)) {
8039 if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
8040 if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) {
8041 r = 0;
8042 goto out;
8043 }
8044 }
8045 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
8046 kvm_mmu_unload(vcpu);
8047 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
8048 __kvm_migrate_timers(vcpu);
8049 if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
8050 kvm_gen_update_masterclock(vcpu->kvm);
8051 if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
8052 kvm_gen_kvmclock_update(vcpu);
8053 if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
8054 r = kvm_guest_time_update(vcpu);
8055 if (unlikely(r))
8056 goto out;
8057 }
8058 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
8059 kvm_mmu_sync_roots(vcpu);
8060 if (kvm_check_request(KVM_REQ_LOAD_CR3, vcpu))
8061 kvm_mmu_load_cr3(vcpu);
8062 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
8063 kvm_vcpu_flush_tlb(vcpu, true);
8064 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
8065 vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
8066 r = 0;
8067 goto out;
8068 }
8069 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
8070 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
8071 vcpu->mmio_needed = 0;
8072 r = 0;
8073 goto out;
8074 }
8075 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
8076
8077 vcpu->arch.apf.halted = true;
8078 r = 1;
8079 goto out;
8080 }
8081 if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
8082 record_steal_time(vcpu);
8083 if (kvm_check_request(KVM_REQ_SMI, vcpu))
8084 process_smi(vcpu);
8085 if (kvm_check_request(KVM_REQ_NMI, vcpu))
8086 process_nmi(vcpu);
8087 if (kvm_check_request(KVM_REQ_PMU, vcpu))
8088 kvm_pmu_handle_event(vcpu);
8089 if (kvm_check_request(KVM_REQ_PMI, vcpu))
8090 kvm_pmu_deliver_pmi(vcpu);
8091 if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
8092 BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
8093 if (test_bit(vcpu->arch.pending_ioapic_eoi,
8094 vcpu->arch.ioapic_handled_vectors)) {
8095 vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
8096 vcpu->run->eoi.vector =
8097 vcpu->arch.pending_ioapic_eoi;
8098 r = 0;
8099 goto out;
8100 }
8101 }
8102 if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
8103 vcpu_scan_ioapic(vcpu);
8104 if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
8105 vcpu_load_eoi_exitmap(vcpu);
8106 if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
8107 kvm_vcpu_reload_apic_access_page(vcpu);
8108 if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
8109 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
8110 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
8111 r = 0;
8112 goto out;
8113 }
8114 if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
8115 vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
8116 vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
8117 r = 0;
8118 goto out;
8119 }
8120 if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
8121 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
8122 vcpu->run->hyperv = vcpu->arch.hyperv.exit;
8123 r = 0;
8124 goto out;
8125 }
8126
8127
8128
8129
8130
8131
8132 if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
8133 kvm_hv_process_stimers(vcpu);
8134 }
8135
8136 if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
8137 ++vcpu->stat.req_event;
8138 kvm_apic_accept_events(vcpu);
8139 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
8140 r = 1;
8141 goto out;
8142 }
8143
8144 if (inject_pending_event(vcpu) != 0)
8145 req_immediate_exit = true;
8146 else {
8147
8148
8149
8150
8151
8152
8153
8154
8155
8156
8157
8158
8159
8160
8161 if (vcpu->arch.smi_pending && !is_smm(vcpu))
8162 if (!kvm_x86_ops->enable_smi_window(vcpu))
8163 req_immediate_exit = true;
8164 if (vcpu->arch.nmi_pending)
8165 kvm_x86_ops->enable_nmi_window(vcpu);
8166 if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
8167 kvm_x86_ops->enable_irq_window(vcpu);
8168 WARN_ON(vcpu->arch.exception.pending);
8169 }
8170
8171 if (kvm_lapic_enabled(vcpu)) {
8172 update_cr8_intercept(vcpu);
8173 kvm_lapic_sync_to_vapic(vcpu);
8174 }
8175 }
8176
8177 r = kvm_mmu_reload(vcpu);
8178 if (unlikely(r)) {
8179 goto cancel_injection;
8180 }
8181
8182 preempt_disable();
8183
8184 kvm_x86_ops->prepare_guest_switch(vcpu);
8185
8186
8187
8188
8189
8190
8191 local_irq_disable();
8192 vcpu->mode = IN_GUEST_MODE;
8193
8194 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
8195
8196
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208 smp_mb__after_srcu_read_unlock();
8209
8210
8211
8212
8213
8214 if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
8215 kvm_x86_ops->sync_pir_to_irr(vcpu);
8216
8217 if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
8218 || need_resched() || signal_pending(current)) {
8219 vcpu->mode = OUTSIDE_GUEST_MODE;
8220 smp_wmb();
8221 local_irq_enable();
8222 preempt_enable();
8223 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8224 r = 1;
8225 goto cancel_injection;
8226 }
8227
8228 if (req_immediate_exit) {
8229 kvm_make_request(KVM_REQ_EVENT, vcpu);
8230 kvm_x86_ops->request_immediate_exit(vcpu);
8231 }
8232
8233 trace_kvm_entry(vcpu->vcpu_id);
8234 guest_enter_irqoff();
8235
8236
8237 vcpu->arch.host_pkru = read_pkru();
8238
8239 fpregs_assert_state_consistent();
8240 if (test_thread_flag(TIF_NEED_FPU_LOAD))
8241 switch_fpu_return();
8242
8243 if (unlikely(vcpu->arch.switch_db_regs)) {
8244 set_debugreg(0, 7);
8245 set_debugreg(vcpu->arch.eff_db[0], 0);
8246 set_debugreg(vcpu->arch.eff_db[1], 1);
8247 set_debugreg(vcpu->arch.eff_db[2], 2);
8248 set_debugreg(vcpu->arch.eff_db[3], 3);
8249 set_debugreg(vcpu->arch.dr6, 6);
8250 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
8251 }
8252
8253 kvm_x86_ops->run(vcpu);
8254
8255
8256
8257
8258
8259
8260
8261 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
8262 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
8263 kvm_x86_ops->sync_dirty_debug_regs(vcpu);
8264 kvm_update_dr0123(vcpu);
8265 kvm_update_dr6(vcpu);
8266 kvm_update_dr7(vcpu);
8267 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
8268 }
8269
8270
8271
8272
8273
8274
8275
8276
8277 if (hw_breakpoint_active())
8278 hw_breakpoint_restore();
8279
8280 vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
8281
8282 vcpu->mode = OUTSIDE_GUEST_MODE;
8283 smp_wmb();
8284
8285 kvm_x86_ops->handle_exit_irqoff(vcpu);
8286
8287
8288
8289
8290
8291
8292
8293
8294 kvm_before_interrupt(vcpu);
8295 local_irq_enable();
8296 ++vcpu->stat.exits;
8297 local_irq_disable();
8298 kvm_after_interrupt(vcpu);
8299
8300 guest_exit_irqoff();
8301 if (lapic_in_kernel(vcpu)) {
8302 s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
8303 if (delta != S64_MIN) {
8304 trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
8305 vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
8306 }
8307 }
8308
8309 local_irq_enable();
8310 preempt_enable();
8311
8312 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8313
8314
8315
8316
8317 if (unlikely(prof_on == KVM_PROFILING)) {
8318 unsigned long rip = kvm_rip_read(vcpu);
8319 profile_hit(KVM_PROFILING, (void *)rip);
8320 }
8321
8322 if (unlikely(vcpu->arch.tsc_always_catchup))
8323 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
8324
8325 if (vcpu->arch.apic_attention)
8326 kvm_lapic_sync_from_vapic(vcpu);
8327
8328 vcpu->arch.gpa_available = false;
8329 r = kvm_x86_ops->handle_exit(vcpu);
8330 return r;
8331
8332 cancel_injection:
8333 kvm_x86_ops->cancel_injection(vcpu);
8334 if (unlikely(vcpu->arch.apic_attention))
8335 kvm_lapic_sync_from_vapic(vcpu);
8336 out:
8337 return r;
8338 }
8339
8340 static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
8341 {
8342 if (!kvm_arch_vcpu_runnable(vcpu) &&
8343 (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
8344 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8345 kvm_vcpu_block(vcpu);
8346 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8347
8348 if (kvm_x86_ops->post_block)
8349 kvm_x86_ops->post_block(vcpu);
8350
8351 if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
8352 return 1;
8353 }
8354
8355 kvm_apic_accept_events(vcpu);
8356 switch(vcpu->arch.mp_state) {
8357 case KVM_MP_STATE_HALTED:
8358 vcpu->arch.pv.pv_unhalted = false;
8359 vcpu->arch.mp_state =
8360 KVM_MP_STATE_RUNNABLE;
8361
8362 case KVM_MP_STATE_RUNNABLE:
8363 vcpu->arch.apf.halted = false;
8364 break;
8365 case KVM_MP_STATE_INIT_RECEIVED:
8366 break;
8367 default:
8368 return -EINTR;
8369 break;
8370 }
8371 return 1;
8372 }
8373
8374 static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
8375 {
8376 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
8377 kvm_x86_ops->check_nested_events(vcpu);
8378
8379 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
8380 !vcpu->arch.apf.halted);
8381 }
8382
8383 static int vcpu_run(struct kvm_vcpu *vcpu)
8384 {
8385 int r;
8386 struct kvm *kvm = vcpu->kvm;
8387
8388 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8389 vcpu->arch.l1tf_flush_l1d = true;
8390
8391 for (;;) {
8392 if (kvm_vcpu_running(vcpu)) {
8393 r = vcpu_enter_guest(vcpu);
8394 } else {
8395 r = vcpu_block(kvm, vcpu);
8396 }
8397
8398 if (r <= 0)
8399 break;
8400
8401 kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
8402 if (kvm_cpu_has_pending_timer(vcpu))
8403 kvm_inject_pending_timer_irqs(vcpu);
8404
8405 if (dm_request_for_irq_injection(vcpu) &&
8406 kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
8407 r = 0;
8408 vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
8409 ++vcpu->stat.request_irq_exits;
8410 break;
8411 }
8412
8413 kvm_check_async_pf_completion(vcpu);
8414
8415 if (signal_pending(current)) {
8416 r = -EINTR;
8417 vcpu->run->exit_reason = KVM_EXIT_INTR;
8418 ++vcpu->stat.signal_exits;
8419 break;
8420 }
8421 if (need_resched()) {
8422 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8423 cond_resched();
8424 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
8425 }
8426 }
8427
8428 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
8429
8430 return r;
8431 }
8432
8433 static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
8434 {
8435 int r;
8436
8437 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
8438 r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
8439 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
8440 return r;
8441 }
8442
8443 static int complete_emulated_pio(struct kvm_vcpu *vcpu)
8444 {
8445 BUG_ON(!vcpu->arch.pio.count);
8446
8447 return complete_emulated_io(vcpu);
8448 }
8449
8450
8451
8452
8453
8454
8455
8456
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466
8467
8468 static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
8469 {
8470 struct kvm_run *run = vcpu->run;
8471 struct kvm_mmio_fragment *frag;
8472 unsigned len;
8473
8474 BUG_ON(!vcpu->mmio_needed);
8475
8476
8477 frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
8478 len = min(8u, frag->len);
8479 if (!vcpu->mmio_is_write)
8480 memcpy(frag->data, run->mmio.data, len);
8481
8482 if (frag->len <= 8) {
8483
8484 frag++;
8485 vcpu->mmio_cur_fragment++;
8486 } else {
8487
8488 frag->data += len;
8489 frag->gpa += len;
8490 frag->len -= len;
8491 }
8492
8493 if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
8494 vcpu->mmio_needed = 0;
8495
8496
8497 if (vcpu->mmio_is_write)
8498 return 1;
8499 vcpu->mmio_read_completed = 1;
8500 return complete_emulated_io(vcpu);
8501 }
8502
8503 run->exit_reason = KVM_EXIT_MMIO;
8504 run->mmio.phys_addr = frag->gpa;
8505 if (vcpu->mmio_is_write)
8506 memcpy(run->mmio.data, frag->data, min(8u, frag->len));
8507 run->mmio.len = min(8u, frag->len);
8508 run->mmio.is_write = vcpu->mmio_is_write;
8509 vcpu->arch.complete_userspace_io = complete_emulated_mmio;
8510 return 0;
8511 }
8512
8513 static void kvm_save_current_fpu(struct fpu *fpu)
8514 {
8515
8516
8517
8518
8519 if (test_thread_flag(TIF_NEED_FPU_LOAD))
8520 memcpy(&fpu->state, ¤t->thread.fpu.state,
8521 fpu_kernel_xstate_size);
8522 else
8523 copy_fpregs_to_fpstate(fpu);
8524 }
8525
8526
8527 static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
8528 {
8529 fpregs_lock();
8530
8531 kvm_save_current_fpu(vcpu->arch.user_fpu);
8532
8533
8534 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
8535 ~XFEATURE_MASK_PKRU);
8536
8537 fpregs_mark_activate();
8538 fpregs_unlock();
8539
8540 trace_kvm_fpu(1);
8541 }
8542
8543
8544 static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
8545 {
8546 fpregs_lock();
8547
8548 kvm_save_current_fpu(vcpu->arch.guest_fpu);
8549
8550 copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
8551
8552 fpregs_mark_activate();
8553 fpregs_unlock();
8554
8555 ++vcpu->stat.fpu_reload;
8556 trace_kvm_fpu(0);
8557 }
8558
8559 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
8560 {
8561 int r;
8562
8563 vcpu_load(vcpu);
8564 kvm_sigset_activate(vcpu);
8565 kvm_load_guest_fpu(vcpu);
8566
8567 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
8568 if (kvm_run->immediate_exit) {
8569 r = -EINTR;
8570 goto out;
8571 }
8572 kvm_vcpu_block(vcpu);
8573 kvm_apic_accept_events(vcpu);
8574 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
8575 r = -EAGAIN;
8576 if (signal_pending(current)) {
8577 r = -EINTR;
8578 vcpu->run->exit_reason = KVM_EXIT_INTR;
8579 ++vcpu->stat.signal_exits;
8580 }
8581 goto out;
8582 }
8583
8584 if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
8585 r = -EINVAL;
8586 goto out;
8587 }
8588
8589 if (vcpu->run->kvm_dirty_regs) {
8590 r = sync_regs(vcpu);
8591 if (r != 0)
8592 goto out;
8593 }
8594
8595
8596 if (!lapic_in_kernel(vcpu)) {
8597 if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
8598 r = -EINVAL;
8599 goto out;
8600 }
8601 }
8602
8603 if (unlikely(vcpu->arch.complete_userspace_io)) {
8604 int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
8605 vcpu->arch.complete_userspace_io = NULL;
8606 r = cui(vcpu);
8607 if (r <= 0)
8608 goto out;
8609 } else
8610 WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
8611
8612 if (kvm_run->immediate_exit)
8613 r = -EINTR;
8614 else
8615 r = vcpu_run(vcpu);
8616
8617 out:
8618 kvm_put_guest_fpu(vcpu);
8619 if (vcpu->run->kvm_valid_regs)
8620 store_regs(vcpu);
8621 post_kvm_run_save(vcpu);
8622 kvm_sigset_deactivate(vcpu);
8623
8624 vcpu_put(vcpu);
8625 return r;
8626 }
8627
8628 static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8629 {
8630 if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
8631
8632
8633
8634
8635
8636
8637
8638 emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
8639 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
8640 }
8641 regs->rax = kvm_rax_read(vcpu);
8642 regs->rbx = kvm_rbx_read(vcpu);
8643 regs->rcx = kvm_rcx_read(vcpu);
8644 regs->rdx = kvm_rdx_read(vcpu);
8645 regs->rsi = kvm_rsi_read(vcpu);
8646 regs->rdi = kvm_rdi_read(vcpu);
8647 regs->rsp = kvm_rsp_read(vcpu);
8648 regs->rbp = kvm_rbp_read(vcpu);
8649 #ifdef CONFIG_X86_64
8650 regs->r8 = kvm_r8_read(vcpu);
8651 regs->r9 = kvm_r9_read(vcpu);
8652 regs->r10 = kvm_r10_read(vcpu);
8653 regs->r11 = kvm_r11_read(vcpu);
8654 regs->r12 = kvm_r12_read(vcpu);
8655 regs->r13 = kvm_r13_read(vcpu);
8656 regs->r14 = kvm_r14_read(vcpu);
8657 regs->r15 = kvm_r15_read(vcpu);
8658 #endif
8659
8660 regs->rip = kvm_rip_read(vcpu);
8661 regs->rflags = kvm_get_rflags(vcpu);
8662 }
8663
8664 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8665 {
8666 vcpu_load(vcpu);
8667 __get_regs(vcpu, regs);
8668 vcpu_put(vcpu);
8669 return 0;
8670 }
8671
8672 static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8673 {
8674 vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
8675 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
8676
8677 kvm_rax_write(vcpu, regs->rax);
8678 kvm_rbx_write(vcpu, regs->rbx);
8679 kvm_rcx_write(vcpu, regs->rcx);
8680 kvm_rdx_write(vcpu, regs->rdx);
8681 kvm_rsi_write(vcpu, regs->rsi);
8682 kvm_rdi_write(vcpu, regs->rdi);
8683 kvm_rsp_write(vcpu, regs->rsp);
8684 kvm_rbp_write(vcpu, regs->rbp);
8685 #ifdef CONFIG_X86_64
8686 kvm_r8_write(vcpu, regs->r8);
8687 kvm_r9_write(vcpu, regs->r9);
8688 kvm_r10_write(vcpu, regs->r10);
8689 kvm_r11_write(vcpu, regs->r11);
8690 kvm_r12_write(vcpu, regs->r12);
8691 kvm_r13_write(vcpu, regs->r13);
8692 kvm_r14_write(vcpu, regs->r14);
8693 kvm_r15_write(vcpu, regs->r15);
8694 #endif
8695
8696 kvm_rip_write(vcpu, regs->rip);
8697 kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
8698
8699 vcpu->arch.exception.pending = false;
8700
8701 kvm_make_request(KVM_REQ_EVENT, vcpu);
8702 }
8703
8704 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
8705 {
8706 vcpu_load(vcpu);
8707 __set_regs(vcpu, regs);
8708 vcpu_put(vcpu);
8709 return 0;
8710 }
8711
8712 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
8713 {
8714 struct kvm_segment cs;
8715
8716 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
8717 *db = cs.db;
8718 *l = cs.l;
8719 }
8720 EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
8721
8722 static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8723 {
8724 struct desc_ptr dt;
8725
8726 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
8727 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
8728 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
8729 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
8730 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
8731 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
8732
8733 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
8734 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
8735
8736 kvm_x86_ops->get_idt(vcpu, &dt);
8737 sregs->idt.limit = dt.size;
8738 sregs->idt.base = dt.address;
8739 kvm_x86_ops->get_gdt(vcpu, &dt);
8740 sregs->gdt.limit = dt.size;
8741 sregs->gdt.base = dt.address;
8742
8743 sregs->cr0 = kvm_read_cr0(vcpu);
8744 sregs->cr2 = vcpu->arch.cr2;
8745 sregs->cr3 = kvm_read_cr3(vcpu);
8746 sregs->cr4 = kvm_read_cr4(vcpu);
8747 sregs->cr8 = kvm_get_cr8(vcpu);
8748 sregs->efer = vcpu->arch.efer;
8749 sregs->apic_base = kvm_get_apic_base(vcpu);
8750
8751 memset(sregs->interrupt_bitmap, 0, sizeof(sregs->interrupt_bitmap));
8752
8753 if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
8754 set_bit(vcpu->arch.interrupt.nr,
8755 (unsigned long *)sregs->interrupt_bitmap);
8756 }
8757
8758 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
8759 struct kvm_sregs *sregs)
8760 {
8761 vcpu_load(vcpu);
8762 __get_sregs(vcpu, sregs);
8763 vcpu_put(vcpu);
8764 return 0;
8765 }
8766
8767 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
8768 struct kvm_mp_state *mp_state)
8769 {
8770 vcpu_load(vcpu);
8771 if (kvm_mpx_supported())
8772 kvm_load_guest_fpu(vcpu);
8773
8774 kvm_apic_accept_events(vcpu);
8775 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
8776 vcpu->arch.pv.pv_unhalted)
8777 mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
8778 else
8779 mp_state->mp_state = vcpu->arch.mp_state;
8780
8781 if (kvm_mpx_supported())
8782 kvm_put_guest_fpu(vcpu);
8783 vcpu_put(vcpu);
8784 return 0;
8785 }
8786
8787 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
8788 struct kvm_mp_state *mp_state)
8789 {
8790 int ret = -EINVAL;
8791
8792 vcpu_load(vcpu);
8793
8794 if (!lapic_in_kernel(vcpu) &&
8795 mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
8796 goto out;
8797
8798
8799 if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
8800 (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
8801 mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
8802 goto out;
8803
8804 if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
8805 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
8806 set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
8807 } else
8808 vcpu->arch.mp_state = mp_state->mp_state;
8809 kvm_make_request(KVM_REQ_EVENT, vcpu);
8810
8811 ret = 0;
8812 out:
8813 vcpu_put(vcpu);
8814 return ret;
8815 }
8816
8817 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
8818 int reason, bool has_error_code, u32 error_code)
8819 {
8820 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
8821 int ret;
8822
8823 init_emulate_ctxt(vcpu);
8824
8825 ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
8826 has_error_code, error_code);
8827 if (ret) {
8828 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
8829 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
8830 vcpu->run->internal.ndata = 0;
8831 return 0;
8832 }
8833
8834 kvm_rip_write(vcpu, ctxt->eip);
8835 kvm_set_rflags(vcpu, ctxt->eflags);
8836 kvm_make_request(KVM_REQ_EVENT, vcpu);
8837 return 1;
8838 }
8839 EXPORT_SYMBOL_GPL(kvm_task_switch);
8840
8841 static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8842 {
8843 if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
8844
8845
8846
8847
8848
8849 if (!(sregs->cr4 & X86_CR4_PAE)
8850 || !(sregs->efer & EFER_LMA))
8851 return -EINVAL;
8852 } else {
8853
8854
8855
8856
8857 if (sregs->efer & EFER_LMA || sregs->cs.l)
8858 return -EINVAL;
8859 }
8860
8861 return kvm_valid_cr4(vcpu, sregs->cr4);
8862 }
8863
8864 static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8865 {
8866 struct msr_data apic_base_msr;
8867 int mmu_reset_needed = 0;
8868 int cpuid_update_needed = 0;
8869 int pending_vec, max_bits, idx;
8870 struct desc_ptr dt;
8871 int ret = -EINVAL;
8872
8873 if (kvm_valid_sregs(vcpu, sregs))
8874 goto out;
8875
8876 apic_base_msr.data = sregs->apic_base;
8877 apic_base_msr.host_initiated = true;
8878 if (kvm_set_apic_base(vcpu, &apic_base_msr))
8879 goto out;
8880
8881 dt.size = sregs->idt.limit;
8882 dt.address = sregs->idt.base;
8883 kvm_x86_ops->set_idt(vcpu, &dt);
8884 dt.size = sregs->gdt.limit;
8885 dt.address = sregs->gdt.base;
8886 kvm_x86_ops->set_gdt(vcpu, &dt);
8887
8888 vcpu->arch.cr2 = sregs->cr2;
8889 mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
8890 vcpu->arch.cr3 = sregs->cr3;
8891 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
8892
8893 kvm_set_cr8(vcpu, sregs->cr8);
8894
8895 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
8896 kvm_x86_ops->set_efer(vcpu, sregs->efer);
8897
8898 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
8899 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
8900 vcpu->arch.cr0 = sregs->cr0;
8901
8902 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
8903 cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
8904 (X86_CR4_OSXSAVE | X86_CR4_PKE));
8905 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
8906 if (cpuid_update_needed)
8907 kvm_update_cpuid(vcpu);
8908
8909 idx = srcu_read_lock(&vcpu->kvm->srcu);
8910 if (is_pae_paging(vcpu)) {
8911 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
8912 mmu_reset_needed = 1;
8913 }
8914 srcu_read_unlock(&vcpu->kvm->srcu, idx);
8915
8916 if (mmu_reset_needed)
8917 kvm_mmu_reset_context(vcpu);
8918
8919 max_bits = KVM_NR_INTERRUPTS;
8920 pending_vec = find_first_bit(
8921 (const unsigned long *)sregs->interrupt_bitmap, max_bits);
8922 if (pending_vec < max_bits) {
8923 kvm_queue_interrupt(vcpu, pending_vec, false);
8924 pr_debug("Set back pending irq %d\n", pending_vec);
8925 }
8926
8927 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
8928 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
8929 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
8930 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
8931 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
8932 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
8933
8934 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
8935 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
8936
8937 update_cr8_intercept(vcpu);
8938
8939
8940 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
8941 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
8942 !is_protmode(vcpu))
8943 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
8944
8945 kvm_make_request(KVM_REQ_EVENT, vcpu);
8946
8947 ret = 0;
8948 out:
8949 return ret;
8950 }
8951
8952 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
8953 struct kvm_sregs *sregs)
8954 {
8955 int ret;
8956
8957 vcpu_load(vcpu);
8958 ret = __set_sregs(vcpu, sregs);
8959 vcpu_put(vcpu);
8960 return ret;
8961 }
8962
8963 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
8964 struct kvm_guest_debug *dbg)
8965 {
8966 unsigned long rflags;
8967 int i, r;
8968
8969 vcpu_load(vcpu);
8970
8971 if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
8972 r = -EBUSY;
8973 if (vcpu->arch.exception.pending)
8974 goto out;
8975 if (dbg->control & KVM_GUESTDBG_INJECT_DB)
8976 kvm_queue_exception(vcpu, DB_VECTOR);
8977 else
8978 kvm_queue_exception(vcpu, BP_VECTOR);
8979 }
8980
8981
8982
8983
8984
8985 rflags = kvm_get_rflags(vcpu);
8986
8987 vcpu->guest_debug = dbg->control;
8988 if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
8989 vcpu->guest_debug = 0;
8990
8991 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
8992 for (i = 0; i < KVM_NR_DB_REGS; ++i)
8993 vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
8994 vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
8995 } else {
8996 for (i = 0; i < KVM_NR_DB_REGS; i++)
8997 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
8998 }
8999 kvm_update_dr7(vcpu);
9000
9001 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
9002 vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
9003 get_segment_base(vcpu, VCPU_SREG_CS);
9004
9005
9006
9007
9008
9009 kvm_set_rflags(vcpu, rflags);
9010
9011 kvm_x86_ops->update_bp_intercept(vcpu);
9012
9013 r = 0;
9014
9015 out:
9016 vcpu_put(vcpu);
9017 return r;
9018 }
9019
9020
9021
9022
9023 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
9024 struct kvm_translation *tr)
9025 {
9026 unsigned long vaddr = tr->linear_address;
9027 gpa_t gpa;
9028 int idx;
9029
9030 vcpu_load(vcpu);
9031
9032 idx = srcu_read_lock(&vcpu->kvm->srcu);
9033 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
9034 srcu_read_unlock(&vcpu->kvm->srcu, idx);
9035 tr->physical_address = gpa;
9036 tr->valid = gpa != UNMAPPED_GVA;
9037 tr->writeable = 1;
9038 tr->usermode = 0;
9039
9040 vcpu_put(vcpu);
9041 return 0;
9042 }
9043
9044 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9045 {
9046 struct fxregs_state *fxsave;
9047
9048 vcpu_load(vcpu);
9049
9050 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
9051 memcpy(fpu->fpr, fxsave->st_space, 128);
9052 fpu->fcw = fxsave->cwd;
9053 fpu->fsw = fxsave->swd;
9054 fpu->ftwx = fxsave->twd;
9055 fpu->last_opcode = fxsave->fop;
9056 fpu->last_ip = fxsave->rip;
9057 fpu->last_dp = fxsave->rdp;
9058 memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space));
9059
9060 vcpu_put(vcpu);
9061 return 0;
9062 }
9063
9064 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
9065 {
9066 struct fxregs_state *fxsave;
9067
9068 vcpu_load(vcpu);
9069
9070 fxsave = &vcpu->arch.guest_fpu->state.fxsave;
9071
9072 memcpy(fxsave->st_space, fpu->fpr, 128);
9073 fxsave->cwd = fpu->fcw;
9074 fxsave->swd = fpu->fsw;
9075 fxsave->twd = fpu->ftwx;
9076 fxsave->fop = fpu->last_opcode;
9077 fxsave->rip = fpu->last_ip;
9078 fxsave->rdp = fpu->last_dp;
9079 memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space));
9080
9081 vcpu_put(vcpu);
9082 return 0;
9083 }
9084
9085 static void store_regs(struct kvm_vcpu *vcpu)
9086 {
9087 BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
9088
9089 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
9090 __get_regs(vcpu, &vcpu->run->s.regs.regs);
9091
9092 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
9093 __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
9094
9095 if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
9096 kvm_vcpu_ioctl_x86_get_vcpu_events(
9097 vcpu, &vcpu->run->s.regs.events);
9098 }
9099
9100 static int sync_regs(struct kvm_vcpu *vcpu)
9101 {
9102 if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
9103 return -EINVAL;
9104
9105 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
9106 __set_regs(vcpu, &vcpu->run->s.regs.regs);
9107 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
9108 }
9109 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
9110 if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
9111 return -EINVAL;
9112 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
9113 }
9114 if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
9115 if (kvm_vcpu_ioctl_x86_set_vcpu_events(
9116 vcpu, &vcpu->run->s.regs.events))
9117 return -EINVAL;
9118 vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
9119 }
9120
9121 return 0;
9122 }
9123
9124 static void fx_init(struct kvm_vcpu *vcpu)
9125 {
9126 fpstate_init(&vcpu->arch.guest_fpu->state);
9127 if (boot_cpu_has(X86_FEATURE_XSAVES))
9128 vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
9129 host_xcr0 | XSTATE_COMPACTION_ENABLED;
9130
9131
9132
9133
9134 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
9135
9136 vcpu->arch.cr0 |= X86_CR0_ET;
9137 }
9138
9139 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
9140 {
9141 void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
9142 struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
9143
9144 kvm_release_pfn(cache->pfn, cache->dirty, cache);
9145
9146 kvmclock_reset(vcpu);
9147
9148 kvm_x86_ops->vcpu_free(vcpu);
9149 free_cpumask_var(wbinvd_dirty_mask);
9150 }
9151
9152 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
9153 unsigned int id)
9154 {
9155 struct kvm_vcpu *vcpu;
9156
9157 if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
9158 printk_once(KERN_WARNING
9159 "kvm: SMP vm created on host with unstable TSC; "
9160 "guest TSC will not be reliable\n");
9161
9162 vcpu = kvm_x86_ops->vcpu_create(kvm, id);
9163
9164 return vcpu;
9165 }
9166
9167 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
9168 {
9169 vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
9170 vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
9171 kvm_vcpu_mtrr_init(vcpu);
9172 vcpu_load(vcpu);
9173 kvm_vcpu_reset(vcpu, false);
9174 kvm_init_mmu(vcpu, false);
9175 vcpu_put(vcpu);
9176 return 0;
9177 }
9178
9179 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
9180 {
9181 struct msr_data msr;
9182 struct kvm *kvm = vcpu->kvm;
9183
9184 kvm_hv_vcpu_postcreate(vcpu);
9185
9186 if (mutex_lock_killable(&vcpu->mutex))
9187 return;
9188 vcpu_load(vcpu);
9189 msr.data = 0x0;
9190 msr.index = MSR_IA32_TSC;
9191 msr.host_initiated = true;
9192 kvm_write_tsc(vcpu, &msr);
9193 vcpu_put(vcpu);
9194
9195
9196 vcpu->arch.msr_kvm_poll_control = 1;
9197
9198 mutex_unlock(&vcpu->mutex);
9199
9200 if (!kvmclock_periodic_sync)
9201 return;
9202
9203 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
9204 KVMCLOCK_SYNC_PERIOD);
9205 }
9206
9207 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
9208 {
9209 kvm_arch_vcpu_free(vcpu);
9210 }
9211
9212 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
9213 {
9214 kvm_lapic_reset(vcpu, init_event);
9215
9216 vcpu->arch.hflags = 0;
9217
9218 vcpu->arch.smi_pending = 0;
9219 vcpu->arch.smi_count = 0;
9220 atomic_set(&vcpu->arch.nmi_queued, 0);
9221 vcpu->arch.nmi_pending = 0;
9222 vcpu->arch.nmi_injected = false;
9223 kvm_clear_interrupt_queue(vcpu);
9224 kvm_clear_exception_queue(vcpu);
9225 vcpu->arch.exception.pending = false;
9226
9227 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
9228 kvm_update_dr0123(vcpu);
9229 vcpu->arch.dr6 = DR6_INIT;
9230 kvm_update_dr6(vcpu);
9231 vcpu->arch.dr7 = DR7_FIXED_1;
9232 kvm_update_dr7(vcpu);
9233
9234 vcpu->arch.cr2 = 0;
9235
9236 kvm_make_request(KVM_REQ_EVENT, vcpu);
9237 vcpu->arch.apf.msr_val = 0;
9238 vcpu->arch.st.msr_val = 0;
9239
9240 kvmclock_reset(vcpu);
9241
9242 kvm_clear_async_pf_completion_queue(vcpu);
9243 kvm_async_pf_hash_reset(vcpu);
9244 vcpu->arch.apf.halted = false;
9245
9246 if (kvm_mpx_supported()) {
9247 void *mpx_state_buffer;
9248
9249
9250
9251
9252
9253 if (init_event)
9254 kvm_put_guest_fpu(vcpu);
9255 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
9256 XFEATURE_BNDREGS);
9257 if (mpx_state_buffer)
9258 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
9259 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
9260 XFEATURE_BNDCSR);
9261 if (mpx_state_buffer)
9262 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
9263 if (init_event)
9264 kvm_load_guest_fpu(vcpu);
9265 }
9266
9267 if (!init_event) {
9268 kvm_pmu_reset(vcpu);
9269 vcpu->arch.smbase = 0x30000;
9270
9271 vcpu->arch.msr_misc_features_enables = 0;
9272
9273 vcpu->arch.xcr0 = XFEATURE_MASK_FP;
9274 }
9275
9276 memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
9277 vcpu->arch.regs_avail = ~0;
9278 vcpu->arch.regs_dirty = ~0;
9279
9280 vcpu->arch.ia32_xss = 0;
9281
9282 kvm_x86_ops->vcpu_reset(vcpu, init_event);
9283 }
9284
9285 void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
9286 {
9287 struct kvm_segment cs;
9288
9289 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
9290 cs.selector = vector << 8;
9291 cs.base = vector << 12;
9292 kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
9293 kvm_rip_write(vcpu, 0);
9294 }
9295
9296 int kvm_arch_hardware_enable(void)
9297 {
9298 struct kvm *kvm;
9299 struct kvm_vcpu *vcpu;
9300 int i;
9301 int ret;
9302 u64 local_tsc;
9303 u64 max_tsc = 0;
9304 bool stable, backwards_tsc = false;
9305
9306 kvm_shared_msr_cpu_online();
9307 ret = kvm_x86_ops->hardware_enable();
9308 if (ret != 0)
9309 return ret;
9310
9311 local_tsc = rdtsc();
9312 stable = !kvm_check_tsc_unstable();
9313 list_for_each_entry(kvm, &vm_list, vm_list) {
9314 kvm_for_each_vcpu(i, vcpu, kvm) {
9315 if (!stable && vcpu->cpu == smp_processor_id())
9316 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
9317 if (stable && vcpu->arch.last_host_tsc > local_tsc) {
9318 backwards_tsc = true;
9319 if (vcpu->arch.last_host_tsc > max_tsc)
9320 max_tsc = vcpu->arch.last_host_tsc;
9321 }
9322 }
9323 }
9324
9325
9326
9327
9328
9329
9330
9331
9332
9333
9334
9335
9336
9337
9338
9339
9340
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350
9351
9352
9353
9354
9355
9356
9357
9358
9359
9360
9361
9362
9363 if (backwards_tsc) {
9364 u64 delta_cyc = max_tsc - local_tsc;
9365 list_for_each_entry(kvm, &vm_list, vm_list) {
9366 kvm->arch.backwards_tsc_observed = true;
9367 kvm_for_each_vcpu(i, vcpu, kvm) {
9368 vcpu->arch.tsc_offset_adjustment += delta_cyc;
9369 vcpu->arch.last_host_tsc = local_tsc;
9370 kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
9371 }
9372
9373
9374
9375
9376
9377
9378
9379 kvm->arch.last_tsc_nsec = 0;
9380 kvm->arch.last_tsc_write = 0;
9381 }
9382
9383 }
9384 return 0;
9385 }
9386
9387 void kvm_arch_hardware_disable(void)
9388 {
9389 kvm_x86_ops->hardware_disable();
9390 drop_user_return_notifiers();
9391 }
9392
9393 int kvm_arch_hardware_setup(void)
9394 {
9395 int r;
9396
9397 r = kvm_x86_ops->hardware_setup();
9398 if (r != 0)
9399 return r;
9400
9401 cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data);
9402
9403 if (kvm_has_tsc_control) {
9404
9405
9406
9407
9408
9409
9410 u64 max = min(0x7fffffffULL,
9411 __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz));
9412 kvm_max_guest_tsc_khz = max;
9413
9414 kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits;
9415 }
9416
9417 kvm_init_msr_list();
9418 return 0;
9419 }
9420
9421 void kvm_arch_hardware_unsetup(void)
9422 {
9423 kvm_x86_ops->hardware_unsetup();
9424 }
9425
9426 int kvm_arch_check_processor_compat(void)
9427 {
9428 return kvm_x86_ops->check_processor_compatibility();
9429 }
9430
9431 bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
9432 {
9433 return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
9434 }
9435 EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
9436
9437 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
9438 {
9439 return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
9440 }
9441
9442 struct static_key kvm_no_apic_vcpu __read_mostly;
9443 EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
9444
9445 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
9446 {
9447 struct page *page;
9448 int r;
9449
9450 vcpu->arch.emulate_ctxt.ops = &emulate_ops;
9451 if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
9452 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
9453 else
9454 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
9455
9456 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
9457 if (!page) {
9458 r = -ENOMEM;
9459 goto fail;
9460 }
9461 vcpu->arch.pio_data = page_address(page);
9462
9463 kvm_set_tsc_khz(vcpu, max_tsc_khz);
9464
9465 r = kvm_mmu_create(vcpu);
9466 if (r < 0)
9467 goto fail_free_pio_data;
9468
9469 if (irqchip_in_kernel(vcpu->kvm)) {
9470 vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
9471 r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
9472 if (r < 0)
9473 goto fail_mmu_destroy;
9474 } else
9475 static_key_slow_inc(&kvm_no_apic_vcpu);
9476
9477 vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
9478 GFP_KERNEL_ACCOUNT);
9479 if (!vcpu->arch.mce_banks) {
9480 r = -ENOMEM;
9481 goto fail_free_lapic;
9482 }
9483 vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
9484
9485 if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
9486 GFP_KERNEL_ACCOUNT)) {
9487 r = -ENOMEM;
9488 goto fail_free_mce_banks;
9489 }
9490
9491 fx_init(vcpu);
9492
9493 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
9494
9495 vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
9496
9497 vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
9498
9499 kvm_async_pf_hash_reset(vcpu);
9500 kvm_pmu_init(vcpu);
9501
9502 vcpu->arch.pending_external_vector = -1;
9503 vcpu->arch.preempted_in_kernel = false;
9504
9505 kvm_hv_vcpu_init(vcpu);
9506
9507 return 0;
9508
9509 fail_free_mce_banks:
9510 kfree(vcpu->arch.mce_banks);
9511 fail_free_lapic:
9512 kvm_free_lapic(vcpu);
9513 fail_mmu_destroy:
9514 kvm_mmu_destroy(vcpu);
9515 fail_free_pio_data:
9516 free_page((unsigned long)vcpu->arch.pio_data);
9517 fail:
9518 return r;
9519 }
9520
9521 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
9522 {
9523 int idx;
9524
9525 kvm_hv_vcpu_uninit(vcpu);
9526 kvm_pmu_destroy(vcpu);
9527 kfree(vcpu->arch.mce_banks);
9528 kvm_free_lapic(vcpu);
9529 idx = srcu_read_lock(&vcpu->kvm->srcu);
9530 kvm_mmu_destroy(vcpu);
9531 srcu_read_unlock(&vcpu->kvm->srcu, idx);
9532 free_page((unsigned long)vcpu->arch.pio_data);
9533 if (!lapic_in_kernel(vcpu))
9534 static_key_slow_dec(&kvm_no_apic_vcpu);
9535 }
9536
9537 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
9538 {
9539 vcpu->arch.l1tf_flush_l1d = true;
9540 kvm_x86_ops->sched_in(vcpu, cpu);
9541 }
9542
9543 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
9544 {
9545 if (type)
9546 return -EINVAL;
9547
9548 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
9549 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
9550 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
9551 INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
9552 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
9553 atomic_set(&kvm->arch.noncoherent_dma_count, 0);
9554
9555
9556 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
9557
9558 set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
9559 &kvm->arch.irq_sources_bitmap);
9560
9561 raw_spin_lock_init(&kvm->arch.tsc_write_lock);
9562 mutex_init(&kvm->arch.apic_map_lock);
9563 spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
9564
9565 kvm->arch.kvmclock_offset = -ktime_get_boottime_ns();
9566 pvclock_update_vm_gtod_copy(kvm);
9567
9568 kvm->arch.guest_can_read_msr_platform_info = true;
9569
9570 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
9571 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
9572
9573 kvm_hv_init_vm(kvm);
9574 kvm_page_track_init(kvm);
9575 kvm_mmu_init_vm(kvm);
9576
9577 return kvm_x86_ops->vm_init(kvm);
9578 }
9579
9580 int kvm_arch_post_init_vm(struct kvm *kvm)
9581 {
9582 return kvm_mmu_post_init_vm(kvm);
9583 }
9584
9585 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
9586 {
9587 vcpu_load(vcpu);
9588 kvm_mmu_unload(vcpu);
9589 vcpu_put(vcpu);
9590 }
9591
9592 static void kvm_free_vcpus(struct kvm *kvm)
9593 {
9594 unsigned int i;
9595 struct kvm_vcpu *vcpu;
9596
9597
9598
9599
9600 kvm_for_each_vcpu(i, vcpu, kvm) {
9601 kvm_clear_async_pf_completion_queue(vcpu);
9602 kvm_unload_vcpu_mmu(vcpu);
9603 }
9604 kvm_for_each_vcpu(i, vcpu, kvm)
9605 kvm_arch_vcpu_free(vcpu);
9606
9607 mutex_lock(&kvm->lock);
9608 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
9609 kvm->vcpus[i] = NULL;
9610
9611 atomic_set(&kvm->online_vcpus, 0);
9612 mutex_unlock(&kvm->lock);
9613 }
9614
9615 void kvm_arch_sync_events(struct kvm *kvm)
9616 {
9617 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
9618 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
9619 kvm_free_pit(kvm);
9620 }
9621
9622 int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
9623 {
9624 int i, r;
9625 unsigned long hva;
9626 struct kvm_memslots *slots = kvm_memslots(kvm);
9627 struct kvm_memory_slot *slot, old;
9628
9629
9630 if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
9631 return -EINVAL;
9632
9633 slot = id_to_memslot(slots, id);
9634 if (size) {
9635 if (slot->npages)
9636 return -EEXIST;
9637
9638
9639
9640
9641
9642 hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
9643 MAP_SHARED | MAP_ANONYMOUS, 0);
9644 if (IS_ERR((void *)hva))
9645 return PTR_ERR((void *)hva);
9646 } else {
9647 if (!slot->npages)
9648 return 0;
9649
9650 hva = 0;
9651 }
9652
9653 old = *slot;
9654 for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
9655 struct kvm_userspace_memory_region m;
9656
9657 m.slot = id | (i << 16);
9658 m.flags = 0;
9659 m.guest_phys_addr = gpa;
9660 m.userspace_addr = hva;
9661 m.memory_size = size;
9662 r = __kvm_set_memory_region(kvm, &m);
9663 if (r < 0)
9664 return r;
9665 }
9666
9667 if (!size)
9668 vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
9669
9670 return 0;
9671 }
9672 EXPORT_SYMBOL_GPL(__x86_set_memory_region);
9673
9674 int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
9675 {
9676 int r;
9677
9678 mutex_lock(&kvm->slots_lock);
9679 r = __x86_set_memory_region(kvm, id, gpa, size);
9680 mutex_unlock(&kvm->slots_lock);
9681
9682 return r;
9683 }
9684 EXPORT_SYMBOL_GPL(x86_set_memory_region);
9685
9686 void kvm_arch_pre_destroy_vm(struct kvm *kvm)
9687 {
9688 kvm_mmu_pre_destroy_vm(kvm);
9689 }
9690
9691 void kvm_arch_destroy_vm(struct kvm *kvm)
9692 {
9693 if (current->mm == kvm->mm) {
9694
9695
9696
9697
9698
9699 x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
9700 x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
9701 x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
9702 }
9703 if (kvm_x86_ops->vm_destroy)
9704 kvm_x86_ops->vm_destroy(kvm);
9705 kvm_pic_destroy(kvm);
9706 kvm_ioapic_destroy(kvm);
9707 kvm_free_vcpus(kvm);
9708 kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
9709 kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
9710 kvm_mmu_uninit_vm(kvm);
9711 kvm_page_track_cleanup(kvm);
9712 kvm_hv_destroy_vm(kvm);
9713 }
9714
9715 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
9716 struct kvm_memory_slot *dont)
9717 {
9718 int i;
9719
9720 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9721 if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
9722 kvfree(free->arch.rmap[i]);
9723 free->arch.rmap[i] = NULL;
9724 }
9725 if (i == 0)
9726 continue;
9727
9728 if (!dont || free->arch.lpage_info[i - 1] !=
9729 dont->arch.lpage_info[i - 1]) {
9730 kvfree(free->arch.lpage_info[i - 1]);
9731 free->arch.lpage_info[i - 1] = NULL;
9732 }
9733 }
9734
9735 kvm_page_track_free_memslot(free, dont);
9736 }
9737
9738 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
9739 unsigned long npages)
9740 {
9741 int i;
9742
9743
9744
9745
9746
9747
9748 memset(&slot->arch, 0, sizeof(slot->arch));
9749
9750 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9751 struct kvm_lpage_info *linfo;
9752 unsigned long ugfn;
9753 int lpages;
9754 int level = i + 1;
9755
9756 lpages = gfn_to_index(slot->base_gfn + npages - 1,
9757 slot->base_gfn, level) + 1;
9758
9759 slot->arch.rmap[i] =
9760 kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
9761 GFP_KERNEL_ACCOUNT);
9762 if (!slot->arch.rmap[i])
9763 goto out_free;
9764 if (i == 0)
9765 continue;
9766
9767 linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
9768 if (!linfo)
9769 goto out_free;
9770
9771 slot->arch.lpage_info[i - 1] = linfo;
9772
9773 if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
9774 linfo[0].disallow_lpage = 1;
9775 if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
9776 linfo[lpages - 1].disallow_lpage = 1;
9777 ugfn = slot->userspace_addr >> PAGE_SHIFT;
9778
9779
9780
9781
9782
9783 if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
9784 !kvm_largepages_enabled()) {
9785 unsigned long j;
9786
9787 for (j = 0; j < lpages; ++j)
9788 linfo[j].disallow_lpage = 1;
9789 }
9790 }
9791
9792 if (kvm_page_track_create_memslot(slot, npages))
9793 goto out_free;
9794
9795 return 0;
9796
9797 out_free:
9798 for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
9799 kvfree(slot->arch.rmap[i]);
9800 slot->arch.rmap[i] = NULL;
9801 if (i == 0)
9802 continue;
9803
9804 kvfree(slot->arch.lpage_info[i - 1]);
9805 slot->arch.lpage_info[i - 1] = NULL;
9806 }
9807 return -ENOMEM;
9808 }
9809
9810 void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
9811 {
9812 struct kvm_vcpu *vcpu;
9813 int i;
9814
9815
9816
9817
9818
9819 kvm_mmu_invalidate_mmio_sptes(kvm, gen);
9820
9821
9822 kvm_for_each_vcpu(i, vcpu, kvm)
9823 kvm_vcpu_kick(vcpu);
9824 }
9825
9826 int kvm_arch_prepare_memory_region(struct kvm *kvm,
9827 struct kvm_memory_slot *memslot,
9828 const struct kvm_userspace_memory_region *mem,
9829 enum kvm_mr_change change)
9830 {
9831 if (change == KVM_MR_MOVE)
9832 return kvm_arch_create_memslot(kvm, memslot,
9833 mem->memory_size >> PAGE_SHIFT);
9834
9835 return 0;
9836 }
9837
9838 static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
9839 struct kvm_memory_slot *new)
9840 {
9841
9842 if (new->flags & KVM_MEM_READONLY) {
9843 kvm_mmu_slot_remove_write_access(kvm, new);
9844 return;
9845 }
9846
9847
9848
9849
9850
9851
9852
9853
9854
9855
9856
9857
9858
9859
9860
9861
9862
9863
9864
9865
9866
9867
9868
9869
9870
9871
9872
9873
9874
9875
9876
9877 if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
9878 if (kvm_x86_ops->slot_enable_log_dirty)
9879 kvm_x86_ops->slot_enable_log_dirty(kvm, new);
9880 else
9881 kvm_mmu_slot_remove_write_access(kvm, new);
9882 } else {
9883 if (kvm_x86_ops->slot_disable_log_dirty)
9884 kvm_x86_ops->slot_disable_log_dirty(kvm, new);
9885 }
9886 }
9887
9888 void kvm_arch_commit_memory_region(struct kvm *kvm,
9889 const struct kvm_userspace_memory_region *mem,
9890 const struct kvm_memory_slot *old,
9891 const struct kvm_memory_slot *new,
9892 enum kvm_mr_change change)
9893 {
9894 if (!kvm->arch.n_requested_mmu_pages)
9895 kvm_mmu_change_mmu_pages(kvm,
9896 kvm_mmu_calculate_default_mmu_pages(kvm));
9897
9898
9899
9900
9901
9902
9903
9904
9905
9906
9907
9908
9909
9910
9911
9912
9913
9914
9915 if (change == KVM_MR_FLAGS_ONLY &&
9916 (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
9917 !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
9918 kvm_mmu_zap_collapsible_sptes(kvm, new);
9919
9920
9921
9922
9923
9924
9925
9926
9927
9928
9929
9930 if (change != KVM_MR_DELETE)
9931 kvm_mmu_slot_apply_flags(kvm, (struct kvm_memory_slot *) new);
9932 }
9933
9934 void kvm_arch_flush_shadow_all(struct kvm *kvm)
9935 {
9936 kvm_mmu_zap_all(kvm);
9937 }
9938
9939 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
9940 struct kvm_memory_slot *slot)
9941 {
9942 kvm_page_track_flush_slot(kvm, slot);
9943 }
9944
9945 static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
9946 {
9947 return (is_guest_mode(vcpu) &&
9948 kvm_x86_ops->guest_apic_has_interrupt &&
9949 kvm_x86_ops->guest_apic_has_interrupt(vcpu));
9950 }
9951
9952 static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
9953 {
9954 if (!list_empty_careful(&vcpu->async_pf.done))
9955 return true;
9956
9957 if (kvm_apic_has_events(vcpu))
9958 return true;
9959
9960 if (vcpu->arch.pv.pv_unhalted)
9961 return true;
9962
9963 if (vcpu->arch.exception.pending)
9964 return true;
9965
9966 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
9967 (vcpu->arch.nmi_pending &&
9968 kvm_x86_ops->nmi_allowed(vcpu)))
9969 return true;
9970
9971 if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
9972 (vcpu->arch.smi_pending && !is_smm(vcpu)))
9973 return true;
9974
9975 if (kvm_arch_interrupt_allowed(vcpu) &&
9976 (kvm_cpu_has_interrupt(vcpu) ||
9977 kvm_guest_apic_has_interrupt(vcpu)))
9978 return true;
9979
9980 if (kvm_hv_has_stimer_pending(vcpu))
9981 return true;
9982
9983 return false;
9984 }
9985
9986 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
9987 {
9988 return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
9989 }
9990
9991 bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
9992 {
9993 if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
9994 return true;
9995
9996 if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
9997 kvm_test_request(KVM_REQ_SMI, vcpu) ||
9998 kvm_test_request(KVM_REQ_EVENT, vcpu))
9999 return true;
10000
10001 if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu))
10002 return true;
10003
10004 return false;
10005 }
10006
10007 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
10008 {
10009 return vcpu->arch.preempted_in_kernel;
10010 }
10011
10012 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
10013 {
10014 return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
10015 }
10016
10017 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
10018 {
10019 return kvm_x86_ops->interrupt_allowed(vcpu);
10020 }
10021
10022 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
10023 {
10024 if (is_64_bit_mode(vcpu))
10025 return kvm_rip_read(vcpu);
10026 return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
10027 kvm_rip_read(vcpu));
10028 }
10029 EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
10030
10031 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
10032 {
10033 return kvm_get_linear_rip(vcpu) == linear_rip;
10034 }
10035 EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
10036
10037 unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
10038 {
10039 unsigned long rflags;
10040
10041 rflags = kvm_x86_ops->get_rflags(vcpu);
10042 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
10043 rflags &= ~X86_EFLAGS_TF;
10044 return rflags;
10045 }
10046 EXPORT_SYMBOL_GPL(kvm_get_rflags);
10047
10048 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
10049 {
10050 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
10051 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
10052 rflags |= X86_EFLAGS_TF;
10053 kvm_x86_ops->set_rflags(vcpu, rflags);
10054 }
10055
10056 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
10057 {
10058 __kvm_set_rflags(vcpu, rflags);
10059 kvm_make_request(KVM_REQ_EVENT, vcpu);
10060 }
10061 EXPORT_SYMBOL_GPL(kvm_set_rflags);
10062
10063 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
10064 {
10065 int r;
10066
10067 if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) ||
10068 work->wakeup_all)
10069 return;
10070
10071 r = kvm_mmu_reload(vcpu);
10072 if (unlikely(r))
10073 return;
10074
10075 if (!vcpu->arch.mmu->direct_map &&
10076 work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu))
10077 return;
10078
10079 vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true);
10080 }
10081
10082 static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
10083 {
10084 return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
10085 }
10086
10087 static inline u32 kvm_async_pf_next_probe(u32 key)
10088 {
10089 return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
10090 }
10091
10092 static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10093 {
10094 u32 key = kvm_async_pf_hash_fn(gfn);
10095
10096 while (vcpu->arch.apf.gfns[key] != ~0)
10097 key = kvm_async_pf_next_probe(key);
10098
10099 vcpu->arch.apf.gfns[key] = gfn;
10100 }
10101
10102 static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
10103 {
10104 int i;
10105 u32 key = kvm_async_pf_hash_fn(gfn);
10106
10107 for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
10108 (vcpu->arch.apf.gfns[key] != gfn &&
10109 vcpu->arch.apf.gfns[key] != ~0); i++)
10110 key = kvm_async_pf_next_probe(key);
10111
10112 return key;
10113 }
10114
10115 bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10116 {
10117 return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
10118 }
10119
10120 static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
10121 {
10122 u32 i, j, k;
10123
10124 i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
10125 while (true) {
10126 vcpu->arch.apf.gfns[i] = ~0;
10127 do {
10128 j = kvm_async_pf_next_probe(j);
10129 if (vcpu->arch.apf.gfns[j] == ~0)
10130 return;
10131 k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
10132
10133
10134
10135
10136
10137 } while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
10138 vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
10139 i = j;
10140 }
10141 }
10142
10143 static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
10144 {
10145
10146 return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val,
10147 sizeof(val));
10148 }
10149
10150 static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
10151 {
10152
10153 return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
10154 sizeof(u32));
10155 }
10156
10157 static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
10158 {
10159 if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
10160 return false;
10161
10162 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
10163 (vcpu->arch.apf.send_user_only &&
10164 kvm_x86_ops->get_cpl(vcpu) == 0))
10165 return false;
10166
10167 return true;
10168 }
10169
10170 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
10171 {
10172 if (unlikely(!lapic_in_kernel(vcpu) ||
10173 kvm_event_needs_reinjection(vcpu) ||
10174 vcpu->arch.exception.pending))
10175 return false;
10176
10177 if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
10178 return false;
10179
10180
10181
10182
10183
10184 return kvm_x86_ops->interrupt_allowed(vcpu);
10185 }
10186
10187 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
10188 struct kvm_async_pf *work)
10189 {
10190 struct x86_exception fault;
10191
10192 trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa);
10193 kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
10194
10195 if (kvm_can_deliver_async_pf(vcpu) &&
10196 !apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
10197 fault.vector = PF_VECTOR;
10198 fault.error_code_valid = true;
10199 fault.error_code = 0;
10200 fault.nested_page_fault = false;
10201 fault.address = work->arch.token;
10202 fault.async_page_fault = true;
10203 kvm_inject_page_fault(vcpu, &fault);
10204 } else {
10205
10206
10207
10208
10209
10210
10211
10212
10213 kvm_make_request(KVM_REQ_APF_HALT, vcpu);
10214 }
10215 }
10216
10217 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
10218 struct kvm_async_pf *work)
10219 {
10220 struct x86_exception fault;
10221 u32 val;
10222
10223 if (work->wakeup_all)
10224 work->arch.token = ~0;
10225 else
10226 kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
10227 trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa);
10228
10229 if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
10230 !apf_get_user(vcpu, &val)) {
10231 if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
10232 vcpu->arch.exception.pending &&
10233 vcpu->arch.exception.nr == PF_VECTOR &&
10234 !apf_put_user(vcpu, 0)) {
10235 vcpu->arch.exception.injected = false;
10236 vcpu->arch.exception.pending = false;
10237 vcpu->arch.exception.nr = 0;
10238 vcpu->arch.exception.has_error_code = false;
10239 vcpu->arch.exception.error_code = 0;
10240 vcpu->arch.exception.has_payload = false;
10241 vcpu->arch.exception.payload = 0;
10242 } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
10243 fault.vector = PF_VECTOR;
10244 fault.error_code_valid = true;
10245 fault.error_code = 0;
10246 fault.nested_page_fault = false;
10247 fault.address = work->arch.token;
10248 fault.async_page_fault = true;
10249 kvm_inject_page_fault(vcpu, &fault);
10250 }
10251 }
10252 vcpu->arch.apf.halted = false;
10253 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
10254 }
10255
10256 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
10257 {
10258 if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED))
10259 return true;
10260 else
10261 return kvm_can_do_async_pf(vcpu);
10262 }
10263
10264 void kvm_arch_start_assignment(struct kvm *kvm)
10265 {
10266 atomic_inc(&kvm->arch.assigned_device_count);
10267 }
10268 EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
10269
10270 void kvm_arch_end_assignment(struct kvm *kvm)
10271 {
10272 atomic_dec(&kvm->arch.assigned_device_count);
10273 }
10274 EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
10275
10276 bool kvm_arch_has_assigned_device(struct kvm *kvm)
10277 {
10278 return atomic_read(&kvm->arch.assigned_device_count);
10279 }
10280 EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
10281
10282 void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
10283 {
10284 atomic_inc(&kvm->arch.noncoherent_dma_count);
10285 }
10286 EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
10287
10288 void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
10289 {
10290 atomic_dec(&kvm->arch.noncoherent_dma_count);
10291 }
10292 EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
10293
10294 bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
10295 {
10296 return atomic_read(&kvm->arch.noncoherent_dma_count);
10297 }
10298 EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
10299
10300 bool kvm_arch_has_irq_bypass(void)
10301 {
10302 return true;
10303 }
10304
10305 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
10306 struct irq_bypass_producer *prod)
10307 {
10308 struct kvm_kernel_irqfd *irqfd =
10309 container_of(cons, struct kvm_kernel_irqfd, consumer);
10310
10311 irqfd->producer = prod;
10312
10313 return kvm_x86_ops->update_pi_irte(irqfd->kvm,
10314 prod->irq, irqfd->gsi, 1);
10315 }
10316
10317 void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
10318 struct irq_bypass_producer *prod)
10319 {
10320 int ret;
10321 struct kvm_kernel_irqfd *irqfd =
10322 container_of(cons, struct kvm_kernel_irqfd, consumer);
10323
10324 WARN_ON(irqfd->producer != prod);
10325 irqfd->producer = NULL;
10326
10327
10328
10329
10330
10331
10332
10333 ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
10334 if (ret)
10335 printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
10336 " fails: %d\n", irqfd->consumer.token, ret);
10337 }
10338
10339 int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
10340 uint32_t guest_irq, bool set)
10341 {
10342 return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
10343 }
10344
10345 bool kvm_vector_hashing_enabled(void)
10346 {
10347 return vector_hashing;
10348 }
10349 EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
10350
10351 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
10352 {
10353 return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
10354 }
10355 EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
10356
10357
10358 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
10359 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
10360 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
10361 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
10362 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
10363 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
10364 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
10365 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
10366 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
10367 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
10368 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmenter_failed);
10369 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
10370 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
10371 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
10372 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
10373 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
10374 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
10375 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
10376 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
10377 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);