This source file includes following definitions.
- vmx_setup_l1d_flush
- vmentry_l1d_flush_parse
- vmentry_l1d_flush_set
- vmentry_l1d_flush_get
- vmread_error
- vmwrite_error
- vmclear_error
- vmptrld_error
- invvpid_error
- invept_error
- check_ept_pointer_match
- kvm_fill_hv_flush_list_func
- __hv_remote_flush_tlb_with_range
- hv_remote_flush_tlb_with_range
- hv_remote_flush_tlb
- hv_enable_direct_tlbflush
- cpu_has_broken_vmx_preemption_timer
- cpu_need_virtualize_apic_accesses
- report_flexpriority
- __find_msr_index
- find_msr_entry
- loaded_vmcs_init
- crash_vmclear_local_loaded_vmcss
- __loaded_vmcs_clear
- loaded_vmcs_clear
- vmx_segment_cache_test_set
- vmx_read_guest_seg_selector
- vmx_read_guest_seg_base
- vmx_read_guest_seg_limit
- vmx_read_guest_seg_ar
- update_exception_bitmap
- msr_write_intercepted
- clear_atomic_switch_msr_special
- find_msr
- clear_atomic_switch_msr
- add_atomic_switch_msr_special
- add_atomic_switch_msr
- update_transition_efer
- segment_base
- pt_load_msr
- pt_save_msr
- pt_guest_enter
- pt_guest_exit
- vmx_set_host_fs_gs
- vmx_prepare_switch_to_guest
- vmx_prepare_switch_to_host
- vmx_read_guest_kernel_gs_base
- vmx_write_guest_kernel_gs_base
- vmx_vcpu_pi_load
- vmx_vcpu_load_vmcs
- vmx_vcpu_load
- vmx_vcpu_pi_put
- vmx_vcpu_put
- emulation_required
- vmx_get_rflags
- vmx_set_rflags
- vmx_get_interrupt_shadow
- vmx_set_interrupt_shadow
- vmx_rtit_ctl_check
- skip_emulated_instruction
- vmx_clear_hlt
- vmx_queue_exception
- vmx_rdtscp_supported
- vmx_invpcid_supported
- move_msr_up
- setup_msrs
- vmx_read_l1_tsc_offset
- vmx_write_l1_tsc_offset
- nested_vmx_allowed
- vmx_feature_control_msr_valid
- vmx_get_msr_feature
- vmx_get_msr
- vmx_set_msr
- vmx_cache_reg
- cpu_has_kvm_support
- vmx_disabled_by_bios
- kvm_cpu_vmxon
- hardware_enable
- vmclear_local_loaded_vmcss
- kvm_cpu_vmxoff
- hardware_disable
- adjust_vmx_controls
- setup_vmcs_config
- alloc_vmcs_cpu
- free_vmcs
- free_loaded_vmcs
- alloc_loaded_vmcs
- free_kvm_area
- alloc_kvm_area
- fix_pmode_seg
- enter_pmode
- fix_rmode_seg
- enter_rmode
- vmx_set_efer
- enter_lmode
- exit_lmode
- vmx_flush_tlb_gva
- vmx_decache_cr0_guest_bits
- vmx_decache_cr3
- vmx_decache_cr4_guest_bits
- ept_load_pdptrs
- ept_save_pdptrs
- ept_update_paging_mode_cr0
- vmx_set_cr0
- get_ept_level
- construct_eptp
- vmx_set_cr3
- vmx_set_cr4
- vmx_get_segment
- vmx_get_segment_base
- vmx_get_cpl
- vmx_segment_access_rights
- vmx_set_segment
- vmx_get_cs_db_l_bits
- vmx_get_idt
- vmx_set_idt
- vmx_get_gdt
- vmx_set_gdt
- rmode_segment_valid
- code_segment_valid
- stack_segment_valid
- data_segment_valid
- tr_valid
- ldtr_valid
- cs_ss_rpl_check
- guest_state_valid
- init_rmode_tss
- init_rmode_identity_map
- seg_setup
- alloc_apic_access_page
- allocate_vpid
- free_vpid
- vmx_disable_intercept_for_msr
- vmx_enable_intercept_for_msr
- vmx_set_intercept_for_msr
- vmx_msr_bitmap_mode
- vmx_update_msr_bitmap_x2apic
- vmx_update_msr_bitmap
- pt_update_intercept_for_msr
- vmx_get_enable_apicv
- vmx_guest_apic_has_interrupt
- kvm_vcpu_trigger_posted_interrupt
- vmx_deliver_nested_posted_interrupt
- vmx_deliver_posted_interrupt
- vmx_set_constant_host_state
- set_cr4_guest_host_mask
- vmx_pin_based_exec_ctrl
- vmx_refresh_apicv_exec_ctrl
- vmx_exec_control
- vmx_compute_secondary_exec_control
- ept_set_mmio_spte_mask
- vmx_vcpu_setup
- vmx_vcpu_reset
- enable_irq_window
- enable_nmi_window
- vmx_inject_irq
- vmx_inject_nmi
- vmx_get_nmi_mask
- vmx_set_nmi_mask
- vmx_nmi_allowed
- vmx_interrupt_allowed
- vmx_set_tss_addr
- vmx_set_identity_map_addr
- rmode_exception
- handle_rmode_exception
- kvm_machine_check
- handle_machine_check
- handle_exception_nmi
- handle_external_interrupt
- handle_triple_fault
- handle_io
- vmx_patch_hypercall
- handle_set_cr0
- handle_set_cr4
- handle_desc
- handle_cr
- handle_dr
- vmx_get_dr6
- vmx_set_dr6
- vmx_sync_dirty_debug_regs
- vmx_set_dr7
- handle_cpuid
- handle_rdmsr
- handle_wrmsr
- handle_tpr_below_threshold
- handle_interrupt_window
- handle_halt
- handle_vmcall
- handle_invd
- handle_invlpg
- handle_rdpmc
- handle_wbinvd
- handle_xsetbv
- handle_apic_access
- handle_apic_eoi_induced
- handle_apic_write
- handle_task_switch
- handle_ept_violation
- handle_ept_misconfig
- handle_nmi_window
- handle_invalid_guest_state
- grow_ple_window
- shrink_ple_window
- wakeup_handler
- vmx_enable_tdp
- handle_pause
- handle_nop
- handle_mwait
- handle_invalid_op
- handle_monitor_trap
- handle_monitor
- handle_invpcid
- handle_pml_full
- handle_preemption_timer
- handle_vmx_instruction
- handle_encls
- vmx_get_exit_info
- vmx_destroy_pml_buffer
- vmx_flush_pml_buffer
- kvm_flush_pml_buffers
- vmx_dump_sel
- vmx_dump_dtsel
- dump_vmcs
- vmx_handle_exit
- vmx_l1d_flush
- update_cr8_intercept
- vmx_set_virtual_apic_mode
- vmx_set_apic_access_page_addr
- vmx_hwapic_isr_update
- vmx_set_rvi
- vmx_hwapic_irr_update
- vmx_sync_pir_to_irr
- vmx_dy_apicv_has_pending_interrupt
- vmx_load_eoi_exitmap
- vmx_apicv_post_state_restore
- handle_exception_nmi_irqoff
- handle_external_interrupt_irqoff
- vmx_handle_exit_irqoff
- vmx_has_emulated_msr
- vmx_pt_supported
- vmx_recover_nmi_blocking
- __vmx_complete_interrupts
- vmx_complete_interrupts
- vmx_cancel_injection
- atomic_switch_perf_msrs
- atomic_switch_umwait_control_msr
- vmx_update_hv_timer
- vmx_update_host_rsp
- vmx_vcpu_run
- vmx_vm_alloc
- vmx_vm_free
- vmx_free_vcpu
- vmx_create_vcpu
- vmx_vm_init
- vmx_check_processor_compat
- vmx_get_mt_mask
- vmx_get_lpage_level
- vmcs_set_secondary_exec_control
- nested_vmx_cr_fixed1_bits_update
- nested_vmx_entry_exit_ctls_update
- update_intel_pt_cfg
- vmx_cpuid_update
- vmx_set_supported_cpuid
- vmx_request_immediate_exit
- vmx_check_intercept_io
- vmx_check_intercept
- u64_shl_div_u64
- vmx_set_hv_timer
- vmx_cancel_hv_timer
- vmx_sched_in
- vmx_slot_enable_log_dirty
- vmx_slot_disable_log_dirty
- vmx_flush_log_dirty
- vmx_write_pml_buffer
- vmx_enable_log_dirty_pt_masked
- __pi_post_block
- pi_pre_block
- vmx_pre_block
- pi_post_block
- vmx_post_block
- vmx_update_pi_irte
- vmx_setup_mce
- vmx_smi_allowed
- vmx_pre_enter_smm
- vmx_pre_leave_smm
- enable_smi_window
- vmx_need_emulation_on_page_fault
- vmx_apic_init_signal_blocked
- hardware_setup
- hardware_unsetup
- vmx_cleanup_l1d_flush
- vmx_exit
- vmx_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 #include <linux/frame.h>
17 #include <linux/highmem.h>
18 #include <linux/hrtimer.h>
19 #include <linux/kernel.h>
20 #include <linux/kvm_host.h>
21 #include <linux/module.h>
22 #include <linux/moduleparam.h>
23 #include <linux/mod_devicetable.h>
24 #include <linux/mm.h>
25 #include <linux/sched.h>
26 #include <linux/sched/smt.h>
27 #include <linux/slab.h>
28 #include <linux/tboot.h>
29 #include <linux/trace_events.h>
30
31 #include <asm/apic.h>
32 #include <asm/asm.h>
33 #include <asm/cpu.h>
34 #include <asm/debugreg.h>
35 #include <asm/desc.h>
36 #include <asm/fpu/internal.h>
37 #include <asm/io.h>
38 #include <asm/irq_remapping.h>
39 #include <asm/kexec.h>
40 #include <asm/perf_event.h>
41 #include <asm/mce.h>
42 #include <asm/mmu_context.h>
43 #include <asm/mshyperv.h>
44 #include <asm/spec-ctrl.h>
45 #include <asm/virtext.h>
46 #include <asm/vmx.h>
47
48 #include "capabilities.h"
49 #include "cpuid.h"
50 #include "evmcs.h"
51 #include "irq.h"
52 #include "kvm_cache_regs.h"
53 #include "lapic.h"
54 #include "mmu.h"
55 #include "nested.h"
56 #include "ops.h"
57 #include "pmu.h"
58 #include "trace.h"
59 #include "vmcs.h"
60 #include "vmcs12.h"
61 #include "vmx.h"
62 #include "x86.h"
63
64 MODULE_AUTHOR("Qumranet");
65 MODULE_LICENSE("GPL");
66
67 static const struct x86_cpu_id vmx_cpu_id[] = {
68 X86_FEATURE_MATCH(X86_FEATURE_VMX),
69 {}
70 };
71 MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
72
73 bool __read_mostly enable_vpid = 1;
74 module_param_named(vpid, enable_vpid, bool, 0444);
75
76 static bool __read_mostly enable_vnmi = 1;
77 module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
78
79 bool __read_mostly flexpriority_enabled = 1;
80 module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
81
82 bool __read_mostly enable_ept = 1;
83 module_param_named(ept, enable_ept, bool, S_IRUGO);
84
85 bool __read_mostly enable_unrestricted_guest = 1;
86 module_param_named(unrestricted_guest,
87 enable_unrestricted_guest, bool, S_IRUGO);
88
89 bool __read_mostly enable_ept_ad_bits = 1;
90 module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
91
92 static bool __read_mostly emulate_invalid_guest_state = true;
93 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
94
95 static bool __read_mostly fasteoi = 1;
96 module_param(fasteoi, bool, S_IRUGO);
97
98 bool __read_mostly enable_apicv = 1;
99 module_param(enable_apicv, bool, S_IRUGO);
100
101
102
103
104
105
106 static bool __read_mostly nested = 1;
107 module_param(nested, bool, S_IRUGO);
108
109 static u64 __read_mostly host_xss;
110
111 bool __read_mostly enable_pml = 1;
112 module_param_named(pml, enable_pml, bool, S_IRUGO);
113
114 static bool __read_mostly dump_invalid_vmcs = 0;
115 module_param(dump_invalid_vmcs, bool, 0644);
116
117 #define MSR_BITMAP_MODE_X2APIC 1
118 #define MSR_BITMAP_MODE_X2APIC_APICV 2
119
120 #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
121
122
123 static int __read_mostly cpu_preemption_timer_multi;
124 static bool __read_mostly enable_preemption_timer = 1;
125 #ifdef CONFIG_X86_64
126 module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
127 #endif
128
129 #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
130 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
131 #define KVM_VM_CR0_ALWAYS_ON \
132 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
133 X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
134 #define KVM_CR4_GUEST_OWNED_BITS \
135 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
136 | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
137
138 #define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
139 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
140 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
141
142 #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
143
144 #define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \
145 RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \
146 RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
147 RTIT_STATUS_BYTECNT))
148
149 #define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
150 (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f)
151
152
153
154
155
156
157
158
159
160
161
162
163 static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
164 module_param(ple_gap, uint, 0444);
165
166 static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
167 module_param(ple_window, uint, 0444);
168
169
170 static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
171 module_param(ple_window_grow, uint, 0444);
172
173
174 static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
175 module_param(ple_window_shrink, uint, 0444);
176
177
178 static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
179 module_param(ple_window_max, uint, 0444);
180
181
182 int __read_mostly pt_mode = PT_MODE_SYSTEM;
183 module_param(pt_mode, int, S_IRUGO);
184
185 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
186 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
187 static DEFINE_MUTEX(vmx_l1d_flush_mutex);
188
189
190 static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
191
192 static const struct {
193 const char *option;
194 bool for_parse;
195 } vmentry_l1d_param[] = {
196 [VMENTER_L1D_FLUSH_AUTO] = {"auto", true},
197 [VMENTER_L1D_FLUSH_NEVER] = {"never", true},
198 [VMENTER_L1D_FLUSH_COND] = {"cond", true},
199 [VMENTER_L1D_FLUSH_ALWAYS] = {"always", true},
200 [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
201 [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
202 };
203
204 #define L1D_CACHE_ORDER 4
205 static void *vmx_l1d_flush_pages;
206
207 static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
208 {
209 struct page *page;
210 unsigned int i;
211
212 if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
213 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
214 return 0;
215 }
216
217 if (!enable_ept) {
218 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
219 return 0;
220 }
221
222 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
223 u64 msr;
224
225 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
226 if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
227 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
228 return 0;
229 }
230 }
231
232
233 if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
234 switch (l1tf_mitigation) {
235 case L1TF_MITIGATION_OFF:
236 l1tf = VMENTER_L1D_FLUSH_NEVER;
237 break;
238 case L1TF_MITIGATION_FLUSH_NOWARN:
239 case L1TF_MITIGATION_FLUSH:
240 case L1TF_MITIGATION_FLUSH_NOSMT:
241 l1tf = VMENTER_L1D_FLUSH_COND;
242 break;
243 case L1TF_MITIGATION_FULL:
244 case L1TF_MITIGATION_FULL_FORCE:
245 l1tf = VMENTER_L1D_FLUSH_ALWAYS;
246 break;
247 }
248 } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
249 l1tf = VMENTER_L1D_FLUSH_ALWAYS;
250 }
251
252 if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
253 !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
254
255
256
257
258 page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
259 if (!page)
260 return -ENOMEM;
261 vmx_l1d_flush_pages = page_address(page);
262
263
264
265
266
267
268 for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
269 memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
270 PAGE_SIZE);
271 }
272 }
273
274 l1tf_vmx_mitigation = l1tf;
275
276 if (l1tf != VMENTER_L1D_FLUSH_NEVER)
277 static_branch_enable(&vmx_l1d_should_flush);
278 else
279 static_branch_disable(&vmx_l1d_should_flush);
280
281 if (l1tf == VMENTER_L1D_FLUSH_COND)
282 static_branch_enable(&vmx_l1d_flush_cond);
283 else
284 static_branch_disable(&vmx_l1d_flush_cond);
285 return 0;
286 }
287
288 static int vmentry_l1d_flush_parse(const char *s)
289 {
290 unsigned int i;
291
292 if (s) {
293 for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
294 if (vmentry_l1d_param[i].for_parse &&
295 sysfs_streq(s, vmentry_l1d_param[i].option))
296 return i;
297 }
298 }
299 return -EINVAL;
300 }
301
302 static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
303 {
304 int l1tf, ret;
305
306 l1tf = vmentry_l1d_flush_parse(s);
307 if (l1tf < 0)
308 return l1tf;
309
310 if (!boot_cpu_has(X86_BUG_L1TF))
311 return 0;
312
313
314
315
316
317
318
319 if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
320 vmentry_l1d_flush_param = l1tf;
321 return 0;
322 }
323
324 mutex_lock(&vmx_l1d_flush_mutex);
325 ret = vmx_setup_l1d_flush(l1tf);
326 mutex_unlock(&vmx_l1d_flush_mutex);
327 return ret;
328 }
329
330 static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
331 {
332 if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
333 return sprintf(s, "???\n");
334
335 return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
336 }
337
338 static const struct kernel_param_ops vmentry_l1d_flush_ops = {
339 .set = vmentry_l1d_flush_set,
340 .get = vmentry_l1d_flush_get,
341 };
342 module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
343
344 static bool guest_state_valid(struct kvm_vcpu *vcpu);
345 static u32 vmx_segment_access_rights(struct kvm_segment *var);
346 static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
347 u32 msr, int type);
348
349 void vmx_vmexit(void);
350
351 #define vmx_insn_failed(fmt...) \
352 do { \
353 WARN_ONCE(1, fmt); \
354 pr_warn_ratelimited(fmt); \
355 } while (0)
356
357 asmlinkage void vmread_error(unsigned long field, bool fault)
358 {
359 if (fault)
360 kvm_spurious_fault();
361 else
362 vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
363 }
364
365 noinline void vmwrite_error(unsigned long field, unsigned long value)
366 {
367 vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n",
368 field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
369 }
370
371 noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
372 {
373 vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr);
374 }
375
376 noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
377 {
378 vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr);
379 }
380
381 noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
382 {
383 vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
384 ext, vpid, gva);
385 }
386
387 noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
388 {
389 vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
390 ext, eptp, gpa);
391 }
392
393 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
394 DEFINE_PER_CPU(struct vmcs *, current_vmcs);
395
396
397
398
399 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
400
401
402
403
404
405 static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
406 static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
407
408 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
409 static DEFINE_SPINLOCK(vmx_vpid_lock);
410
411 struct vmcs_config vmcs_config;
412 struct vmx_capability vmx_capability;
413
414 #define VMX_SEGMENT_FIELD(seg) \
415 [VCPU_SREG_##seg] = { \
416 .selector = GUEST_##seg##_SELECTOR, \
417 .base = GUEST_##seg##_BASE, \
418 .limit = GUEST_##seg##_LIMIT, \
419 .ar_bytes = GUEST_##seg##_AR_BYTES, \
420 }
421
422 static const struct kvm_vmx_segment_field {
423 unsigned selector;
424 unsigned base;
425 unsigned limit;
426 unsigned ar_bytes;
427 } kvm_vmx_segment_fields[] = {
428 VMX_SEGMENT_FIELD(CS),
429 VMX_SEGMENT_FIELD(DS),
430 VMX_SEGMENT_FIELD(ES),
431 VMX_SEGMENT_FIELD(FS),
432 VMX_SEGMENT_FIELD(GS),
433 VMX_SEGMENT_FIELD(SS),
434 VMX_SEGMENT_FIELD(TR),
435 VMX_SEGMENT_FIELD(LDTR),
436 };
437
438 u64 host_efer;
439 static unsigned long host_idt_base;
440
441
442
443
444
445
446
447
448 const u32 vmx_msr_index[] = {
449 #ifdef CONFIG_X86_64
450 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
451 #endif
452 MSR_EFER, MSR_TSC_AUX, MSR_STAR,
453 };
454
455 #if IS_ENABLED(CONFIG_HYPERV)
456 static bool __read_mostly enlightened_vmcs = true;
457 module_param(enlightened_vmcs, bool, 0444);
458
459
460 static void check_ept_pointer_match(struct kvm *kvm)
461 {
462 struct kvm_vcpu *vcpu;
463 u64 tmp_eptp = INVALID_PAGE;
464 int i;
465
466 kvm_for_each_vcpu(i, vcpu, kvm) {
467 if (!VALID_PAGE(tmp_eptp)) {
468 tmp_eptp = to_vmx(vcpu)->ept_pointer;
469 } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
470 to_kvm_vmx(kvm)->ept_pointers_match
471 = EPT_POINTERS_MISMATCH;
472 return;
473 }
474 }
475
476 to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
477 }
478
479 static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
480 void *data)
481 {
482 struct kvm_tlb_range *range = data;
483
484 return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
485 range->pages);
486 }
487
488 static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm,
489 struct kvm_vcpu *vcpu, struct kvm_tlb_range *range)
490 {
491 u64 ept_pointer = to_vmx(vcpu)->ept_pointer;
492
493
494
495
496
497
498 if (range)
499 return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK,
500 kvm_fill_hv_flush_list_func, (void *)range);
501 else
502 return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK);
503 }
504
505 static int hv_remote_flush_tlb_with_range(struct kvm *kvm,
506 struct kvm_tlb_range *range)
507 {
508 struct kvm_vcpu *vcpu;
509 int ret = 0, i;
510
511 spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
512
513 if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
514 check_ept_pointer_match(kvm);
515
516 if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
517 kvm_for_each_vcpu(i, vcpu, kvm) {
518
519 if (VALID_PAGE(to_vmx(vcpu)->ept_pointer))
520 ret |= __hv_remote_flush_tlb_with_range(
521 kvm, vcpu, range);
522 }
523 } else {
524 ret = __hv_remote_flush_tlb_with_range(kvm,
525 kvm_get_vcpu(kvm, 0), range);
526 }
527
528 spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
529 return ret;
530 }
531 static int hv_remote_flush_tlb(struct kvm *kvm)
532 {
533 return hv_remote_flush_tlb_with_range(kvm, NULL);
534 }
535
536 static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
537 {
538 struct hv_enlightened_vmcs *evmcs;
539 struct hv_partition_assist_pg **p_hv_pa_pg =
540 &vcpu->kvm->arch.hyperv.hv_pa_pg;
541
542
543
544
545 if (!*p_hv_pa_pg)
546 *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
547
548 if (!*p_hv_pa_pg)
549 return -ENOMEM;
550
551 evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
552
553 evmcs->partition_assist_page =
554 __pa(*p_hv_pa_pg);
555 evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
556 evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
557
558 return 0;
559 }
560
561 #endif
562
563
564
565
566
567
568 static u32 vmx_preemption_cpu_tfms[] = {
569
570 0x000206E6,
571
572
573
574 0x00020652,
575
576 0x00020655,
577
578
579
580
581
582
583 0x000106E5,
584
585 0x000106A0,
586
587 0x000106A1,
588
589 0x000106A4,
590
591
592
593 0x000106A5,
594
595 0x000306A8,
596 };
597
598 static inline bool cpu_has_broken_vmx_preemption_timer(void)
599 {
600 u32 eax = cpuid_eax(0x00000001), i;
601
602
603 eax &= ~(0x3U << 14 | 0xfU << 28);
604 for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
605 if (eax == vmx_preemption_cpu_tfms[i])
606 return true;
607
608 return false;
609 }
610
611 static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
612 {
613 return flexpriority_enabled && lapic_in_kernel(vcpu);
614 }
615
616 static inline bool report_flexpriority(void)
617 {
618 return flexpriority_enabled;
619 }
620
621 static inline int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
622 {
623 int i;
624
625 for (i = 0; i < vmx->nmsrs; ++i)
626 if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
627 return i;
628 return -1;
629 }
630
631 struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
632 {
633 int i;
634
635 i = __find_msr_index(vmx, msr);
636 if (i >= 0)
637 return &vmx->guest_msrs[i];
638 return NULL;
639 }
640
641 void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
642 {
643 vmcs_clear(loaded_vmcs->vmcs);
644 if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
645 vmcs_clear(loaded_vmcs->shadow_vmcs);
646 loaded_vmcs->cpu = -1;
647 loaded_vmcs->launched = 0;
648 }
649
650 #ifdef CONFIG_KEXEC_CORE
651 static void crash_vmclear_local_loaded_vmcss(void)
652 {
653 int cpu = raw_smp_processor_id();
654 struct loaded_vmcs *v;
655
656 list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
657 loaded_vmcss_on_cpu_link)
658 vmcs_clear(v->vmcs);
659 }
660 #endif
661
662 static void __loaded_vmcs_clear(void *arg)
663 {
664 struct loaded_vmcs *loaded_vmcs = arg;
665 int cpu = raw_smp_processor_id();
666
667 if (loaded_vmcs->cpu != cpu)
668 return;
669 if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
670 per_cpu(current_vmcs, cpu) = NULL;
671
672 vmcs_clear(loaded_vmcs->vmcs);
673 if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
674 vmcs_clear(loaded_vmcs->shadow_vmcs);
675
676 list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
677
678
679
680
681
682
683
684
685 smp_wmb();
686
687 loaded_vmcs->cpu = -1;
688 loaded_vmcs->launched = 0;
689 }
690
691 void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
692 {
693 int cpu = loaded_vmcs->cpu;
694
695 if (cpu != -1)
696 smp_call_function_single(cpu,
697 __loaded_vmcs_clear, loaded_vmcs, 1);
698 }
699
700 static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
701 unsigned field)
702 {
703 bool ret;
704 u32 mask = 1 << (seg * SEG_FIELD_NR + field);
705
706 if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
707 vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
708 vmx->segment_cache.bitmask = 0;
709 }
710 ret = vmx->segment_cache.bitmask & mask;
711 vmx->segment_cache.bitmask |= mask;
712 return ret;
713 }
714
715 static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
716 {
717 u16 *p = &vmx->segment_cache.seg[seg].selector;
718
719 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
720 *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
721 return *p;
722 }
723
724 static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
725 {
726 ulong *p = &vmx->segment_cache.seg[seg].base;
727
728 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
729 *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
730 return *p;
731 }
732
733 static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
734 {
735 u32 *p = &vmx->segment_cache.seg[seg].limit;
736
737 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
738 *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
739 return *p;
740 }
741
742 static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
743 {
744 u32 *p = &vmx->segment_cache.seg[seg].ar;
745
746 if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
747 *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
748 return *p;
749 }
750
751 void update_exception_bitmap(struct kvm_vcpu *vcpu)
752 {
753 u32 eb;
754
755 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
756 (1u << DB_VECTOR) | (1u << AC_VECTOR);
757
758
759
760
761
762
763 if (enable_vmware_backdoor)
764 eb |= (1u << GP_VECTOR);
765 if ((vcpu->guest_debug &
766 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
767 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
768 eb |= 1u << BP_VECTOR;
769 if (to_vmx(vcpu)->rmode.vm86_active)
770 eb = ~0;
771 if (enable_ept)
772 eb &= ~(1u << PF_VECTOR);
773
774
775
776
777
778
779 if (is_guest_mode(vcpu))
780 eb |= get_vmcs12(vcpu)->exception_bitmap;
781
782 vmcs_write32(EXCEPTION_BITMAP, eb);
783 }
784
785
786
787
788 static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
789 {
790 unsigned long *msr_bitmap;
791 int f = sizeof(unsigned long);
792
793 if (!cpu_has_vmx_msr_bitmap())
794 return true;
795
796 msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
797
798 if (msr <= 0x1fff) {
799 return !!test_bit(msr, msr_bitmap + 0x800 / f);
800 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
801 msr &= 0x1fff;
802 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
803 }
804
805 return true;
806 }
807
808 static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
809 unsigned long entry, unsigned long exit)
810 {
811 vm_entry_controls_clearbit(vmx, entry);
812 vm_exit_controls_clearbit(vmx, exit);
813 }
814
815 static int find_msr(struct vmx_msrs *m, unsigned int msr)
816 {
817 unsigned int i;
818
819 for (i = 0; i < m->nr; ++i) {
820 if (m->val[i].index == msr)
821 return i;
822 }
823 return -ENOENT;
824 }
825
826 static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
827 {
828 int i;
829 struct msr_autoload *m = &vmx->msr_autoload;
830
831 switch (msr) {
832 case MSR_EFER:
833 if (cpu_has_load_ia32_efer()) {
834 clear_atomic_switch_msr_special(vmx,
835 VM_ENTRY_LOAD_IA32_EFER,
836 VM_EXIT_LOAD_IA32_EFER);
837 return;
838 }
839 break;
840 case MSR_CORE_PERF_GLOBAL_CTRL:
841 if (cpu_has_load_perf_global_ctrl()) {
842 clear_atomic_switch_msr_special(vmx,
843 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
844 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
845 return;
846 }
847 break;
848 }
849 i = find_msr(&m->guest, msr);
850 if (i < 0)
851 goto skip_guest;
852 --m->guest.nr;
853 m->guest.val[i] = m->guest.val[m->guest.nr];
854 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
855
856 skip_guest:
857 i = find_msr(&m->host, msr);
858 if (i < 0)
859 return;
860
861 --m->host.nr;
862 m->host.val[i] = m->host.val[m->host.nr];
863 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
864 }
865
866 static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
867 unsigned long entry, unsigned long exit,
868 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
869 u64 guest_val, u64 host_val)
870 {
871 vmcs_write64(guest_val_vmcs, guest_val);
872 if (host_val_vmcs != HOST_IA32_EFER)
873 vmcs_write64(host_val_vmcs, host_val);
874 vm_entry_controls_setbit(vmx, entry);
875 vm_exit_controls_setbit(vmx, exit);
876 }
877
878 static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
879 u64 guest_val, u64 host_val, bool entry_only)
880 {
881 int i, j = 0;
882 struct msr_autoload *m = &vmx->msr_autoload;
883
884 switch (msr) {
885 case MSR_EFER:
886 if (cpu_has_load_ia32_efer()) {
887 add_atomic_switch_msr_special(vmx,
888 VM_ENTRY_LOAD_IA32_EFER,
889 VM_EXIT_LOAD_IA32_EFER,
890 GUEST_IA32_EFER,
891 HOST_IA32_EFER,
892 guest_val, host_val);
893 return;
894 }
895 break;
896 case MSR_CORE_PERF_GLOBAL_CTRL:
897 if (cpu_has_load_perf_global_ctrl()) {
898 add_atomic_switch_msr_special(vmx,
899 VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
900 VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
901 GUEST_IA32_PERF_GLOBAL_CTRL,
902 HOST_IA32_PERF_GLOBAL_CTRL,
903 guest_val, host_val);
904 return;
905 }
906 break;
907 case MSR_IA32_PEBS_ENABLE:
908
909
910
911
912
913 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
914 }
915
916 i = find_msr(&m->guest, msr);
917 if (!entry_only)
918 j = find_msr(&m->host, msr);
919
920 if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) ||
921 (j < 0 && m->host.nr == NR_AUTOLOAD_MSRS)) {
922 printk_once(KERN_WARNING "Not enough msr switch entries. "
923 "Can't add msr %x\n", msr);
924 return;
925 }
926 if (i < 0) {
927 i = m->guest.nr++;
928 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
929 }
930 m->guest.val[i].index = msr;
931 m->guest.val[i].value = guest_val;
932
933 if (entry_only)
934 return;
935
936 if (j < 0) {
937 j = m->host.nr++;
938 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
939 }
940 m->host.val[j].index = msr;
941 m->host.val[j].value = host_val;
942 }
943
944 static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
945 {
946 u64 guest_efer = vmx->vcpu.arch.efer;
947 u64 ignore_bits = 0;
948
949
950 if (!enable_ept)
951 guest_efer |= EFER_NX;
952
953
954
955
956 ignore_bits |= EFER_SCE;
957 #ifdef CONFIG_X86_64
958 ignore_bits |= EFER_LMA | EFER_LME;
959
960 if (guest_efer & EFER_LMA)
961 ignore_bits &= ~(u64)EFER_SCE;
962 #endif
963
964
965
966
967
968
969 if (cpu_has_load_ia32_efer() ||
970 (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
971 if (!(guest_efer & EFER_LMA))
972 guest_efer &= ~EFER_LME;
973 if (guest_efer != host_efer)
974 add_atomic_switch_msr(vmx, MSR_EFER,
975 guest_efer, host_efer, false);
976 else
977 clear_atomic_switch_msr(vmx, MSR_EFER);
978 return false;
979 } else {
980 clear_atomic_switch_msr(vmx, MSR_EFER);
981
982 guest_efer &= ~ignore_bits;
983 guest_efer |= host_efer & ignore_bits;
984
985 vmx->guest_msrs[efer_offset].data = guest_efer;
986 vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
987
988 return true;
989 }
990 }
991
992 #ifdef CONFIG_X86_32
993
994
995
996
997
998 static unsigned long segment_base(u16 selector)
999 {
1000 struct desc_struct *table;
1001 unsigned long v;
1002
1003 if (!(selector & ~SEGMENT_RPL_MASK))
1004 return 0;
1005
1006 table = get_current_gdt_ro();
1007
1008 if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
1009 u16 ldt_selector = kvm_read_ldt();
1010
1011 if (!(ldt_selector & ~SEGMENT_RPL_MASK))
1012 return 0;
1013
1014 table = (struct desc_struct *)segment_base(ldt_selector);
1015 }
1016 v = get_desc_base(&table[selector >> 3]);
1017 return v;
1018 }
1019 #endif
1020
1021 static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
1022 {
1023 u32 i;
1024
1025 wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
1026 wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
1027 wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
1028 wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
1029 for (i = 0; i < addr_range; i++) {
1030 wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
1031 wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
1032 }
1033 }
1034
1035 static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
1036 {
1037 u32 i;
1038
1039 rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
1040 rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
1041 rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
1042 rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
1043 for (i = 0; i < addr_range; i++) {
1044 rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
1045 rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
1046 }
1047 }
1048
1049 static void pt_guest_enter(struct vcpu_vmx *vmx)
1050 {
1051 if (pt_mode == PT_MODE_SYSTEM)
1052 return;
1053
1054
1055
1056
1057
1058 rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
1059 if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
1060 wrmsrl(MSR_IA32_RTIT_CTL, 0);
1061 pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
1062 pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
1063 }
1064 }
1065
1066 static void pt_guest_exit(struct vcpu_vmx *vmx)
1067 {
1068 if (pt_mode == PT_MODE_SYSTEM)
1069 return;
1070
1071 if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
1072 pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
1073 pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
1074 }
1075
1076
1077 wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
1078 }
1079
1080 void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
1081 unsigned long fs_base, unsigned long gs_base)
1082 {
1083 if (unlikely(fs_sel != host->fs_sel)) {
1084 if (!(fs_sel & 7))
1085 vmcs_write16(HOST_FS_SELECTOR, fs_sel);
1086 else
1087 vmcs_write16(HOST_FS_SELECTOR, 0);
1088 host->fs_sel = fs_sel;
1089 }
1090 if (unlikely(gs_sel != host->gs_sel)) {
1091 if (!(gs_sel & 7))
1092 vmcs_write16(HOST_GS_SELECTOR, gs_sel);
1093 else
1094 vmcs_write16(HOST_GS_SELECTOR, 0);
1095 host->gs_sel = gs_sel;
1096 }
1097 if (unlikely(fs_base != host->fs_base)) {
1098 vmcs_writel(HOST_FS_BASE, fs_base);
1099 host->fs_base = fs_base;
1100 }
1101 if (unlikely(gs_base != host->gs_base)) {
1102 vmcs_writel(HOST_GS_BASE, gs_base);
1103 host->gs_base = gs_base;
1104 }
1105 }
1106
1107 void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
1108 {
1109 struct vcpu_vmx *vmx = to_vmx(vcpu);
1110 struct vmcs_host_state *host_state;
1111 #ifdef CONFIG_X86_64
1112 int cpu = raw_smp_processor_id();
1113 #endif
1114 unsigned long fs_base, gs_base;
1115 u16 fs_sel, gs_sel;
1116 int i;
1117
1118 vmx->req_immediate_exit = false;
1119
1120
1121
1122
1123
1124
1125 if (!vmx->guest_msrs_ready) {
1126 vmx->guest_msrs_ready = true;
1127 for (i = 0; i < vmx->save_nmsrs; ++i)
1128 kvm_set_shared_msr(vmx->guest_msrs[i].index,
1129 vmx->guest_msrs[i].data,
1130 vmx->guest_msrs[i].mask);
1131
1132 }
1133 if (vmx->guest_state_loaded)
1134 return;
1135
1136 host_state = &vmx->loaded_vmcs->host_state;
1137
1138
1139
1140
1141
1142 host_state->ldt_sel = kvm_read_ldt();
1143
1144 #ifdef CONFIG_X86_64
1145 savesegment(ds, host_state->ds_sel);
1146 savesegment(es, host_state->es_sel);
1147
1148 gs_base = cpu_kernelmode_gs_base(cpu);
1149 if (likely(is_64bit_mm(current->mm))) {
1150 save_fsgs_for_kvm();
1151 fs_sel = current->thread.fsindex;
1152 gs_sel = current->thread.gsindex;
1153 fs_base = current->thread.fsbase;
1154 vmx->msr_host_kernel_gs_base = current->thread.gsbase;
1155 } else {
1156 savesegment(fs, fs_sel);
1157 savesegment(gs, gs_sel);
1158 fs_base = read_msr(MSR_FS_BASE);
1159 vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
1160 }
1161
1162 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
1163 #else
1164 savesegment(fs, fs_sel);
1165 savesegment(gs, gs_sel);
1166 fs_base = segment_base(fs_sel);
1167 gs_base = segment_base(gs_sel);
1168 #endif
1169
1170 vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
1171 vmx->guest_state_loaded = true;
1172 }
1173
1174 static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
1175 {
1176 struct vmcs_host_state *host_state;
1177
1178 if (!vmx->guest_state_loaded)
1179 return;
1180
1181 host_state = &vmx->loaded_vmcs->host_state;
1182
1183 ++vmx->vcpu.stat.host_state_reload;
1184
1185 #ifdef CONFIG_X86_64
1186 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
1187 #endif
1188 if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
1189 kvm_load_ldt(host_state->ldt_sel);
1190 #ifdef CONFIG_X86_64
1191 load_gs_index(host_state->gs_sel);
1192 #else
1193 loadsegment(gs, host_state->gs_sel);
1194 #endif
1195 }
1196 if (host_state->fs_sel & 7)
1197 loadsegment(fs, host_state->fs_sel);
1198 #ifdef CONFIG_X86_64
1199 if (unlikely(host_state->ds_sel | host_state->es_sel)) {
1200 loadsegment(ds, host_state->ds_sel);
1201 loadsegment(es, host_state->es_sel);
1202 }
1203 #endif
1204 invalidate_tss_limit();
1205 #ifdef CONFIG_X86_64
1206 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
1207 #endif
1208 load_fixmap_gdt(raw_smp_processor_id());
1209 vmx->guest_state_loaded = false;
1210 vmx->guest_msrs_ready = false;
1211 }
1212
1213 #ifdef CONFIG_X86_64
1214 static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
1215 {
1216 preempt_disable();
1217 if (vmx->guest_state_loaded)
1218 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
1219 preempt_enable();
1220 return vmx->msr_guest_kernel_gs_base;
1221 }
1222
1223 static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
1224 {
1225 preempt_disable();
1226 if (vmx->guest_state_loaded)
1227 wrmsrl(MSR_KERNEL_GS_BASE, data);
1228 preempt_enable();
1229 vmx->msr_guest_kernel_gs_base = data;
1230 }
1231 #endif
1232
1233 static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
1234 {
1235 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
1236 struct pi_desc old, new;
1237 unsigned int dest;
1238
1239
1240
1241
1242
1243
1244
1245 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
1246 return;
1247
1248
1249
1250
1251
1252
1253
1254
1255 if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
1256 pi_clear_sn(pi_desc);
1257 goto after_clear_sn;
1258 }
1259
1260
1261 do {
1262 old.control = new.control = pi_desc->control;
1263
1264 dest = cpu_physical_id(cpu);
1265
1266 if (x2apic_enabled())
1267 new.ndst = dest;
1268 else
1269 new.ndst = (dest << 8) & 0xFF00;
1270
1271 new.sn = 0;
1272 } while (cmpxchg64(&pi_desc->control, old.control,
1273 new.control) != old.control);
1274
1275 after_clear_sn:
1276
1277
1278
1279
1280
1281
1282
1283 smp_mb__after_atomic();
1284
1285 if (!pi_is_pir_empty(pi_desc))
1286 pi_set_on(pi_desc);
1287 }
1288
1289 void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
1290 struct loaded_vmcs *buddy)
1291 {
1292 struct vcpu_vmx *vmx = to_vmx(vcpu);
1293 bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
1294 struct vmcs *prev;
1295
1296 if (!already_loaded) {
1297 loaded_vmcs_clear(vmx->loaded_vmcs);
1298 local_irq_disable();
1299
1300
1301
1302
1303
1304
1305
1306 smp_rmb();
1307
1308 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
1309 &per_cpu(loaded_vmcss_on_cpu, cpu));
1310 local_irq_enable();
1311 }
1312
1313 prev = per_cpu(current_vmcs, cpu);
1314 if (prev != vmx->loaded_vmcs->vmcs) {
1315 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
1316 vmcs_load(vmx->loaded_vmcs->vmcs);
1317
1318
1319
1320
1321
1322
1323 if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev))
1324 indirect_branch_prediction_barrier();
1325 }
1326
1327 if (!already_loaded) {
1328 void *gdt = get_current_gdt_ro();
1329 unsigned long sysenter_esp;
1330
1331 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1332
1333
1334
1335
1336
1337 vmcs_writel(HOST_TR_BASE,
1338 (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
1339 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);
1340
1341
1342
1343
1344
1345
1346
1347 BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67);
1348
1349 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
1350 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp);
1351
1352 vmx->loaded_vmcs->cpu = cpu;
1353 }
1354
1355
1356 if (kvm_has_tsc_control &&
1357 vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
1358 decache_tsc_multiplier(vmx);
1359 }
1360
1361
1362
1363
1364
1365 void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1366 {
1367 struct vcpu_vmx *vmx = to_vmx(vcpu);
1368
1369 vmx_vcpu_load_vmcs(vcpu, cpu, NULL);
1370
1371 vmx_vcpu_pi_load(vcpu, cpu);
1372
1373 vmx->host_debugctlmsr = get_debugctlmsr();
1374 }
1375
1376 static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
1377 {
1378 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
1379
1380 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
1381 !irq_remapping_cap(IRQ_POSTING_CAP) ||
1382 !kvm_vcpu_apicv_active(vcpu))
1383 return;
1384
1385
1386 if (vcpu->preempted)
1387 pi_set_sn(pi_desc);
1388 }
1389
1390 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
1391 {
1392 vmx_vcpu_pi_put(vcpu);
1393
1394 vmx_prepare_switch_to_host(to_vmx(vcpu));
1395 }
1396
1397 static bool emulation_required(struct kvm_vcpu *vcpu)
1398 {
1399 return emulate_invalid_guest_state && !guest_state_valid(vcpu);
1400 }
1401
1402 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
1403
1404 unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
1405 {
1406 unsigned long rflags, save_rflags;
1407
1408 if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
1409 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
1410 rflags = vmcs_readl(GUEST_RFLAGS);
1411 if (to_vmx(vcpu)->rmode.vm86_active) {
1412 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
1413 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
1414 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
1415 }
1416 to_vmx(vcpu)->rflags = rflags;
1417 }
1418 return to_vmx(vcpu)->rflags;
1419 }
1420
1421 void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1422 {
1423 unsigned long old_rflags = vmx_get_rflags(vcpu);
1424
1425 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
1426 to_vmx(vcpu)->rflags = rflags;
1427 if (to_vmx(vcpu)->rmode.vm86_active) {
1428 to_vmx(vcpu)->rmode.save_rflags = rflags;
1429 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
1430 }
1431 vmcs_writel(GUEST_RFLAGS, rflags);
1432
1433 if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
1434 to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
1435 }
1436
1437 u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
1438 {
1439 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
1440 int ret = 0;
1441
1442 if (interruptibility & GUEST_INTR_STATE_STI)
1443 ret |= KVM_X86_SHADOW_INT_STI;
1444 if (interruptibility & GUEST_INTR_STATE_MOV_SS)
1445 ret |= KVM_X86_SHADOW_INT_MOV_SS;
1446
1447 return ret;
1448 }
1449
1450 void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
1451 {
1452 u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
1453 u32 interruptibility = interruptibility_old;
1454
1455 interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
1456
1457 if (mask & KVM_X86_SHADOW_INT_MOV_SS)
1458 interruptibility |= GUEST_INTR_STATE_MOV_SS;
1459 else if (mask & KVM_X86_SHADOW_INT_STI)
1460 interruptibility |= GUEST_INTR_STATE_STI;
1461
1462 if ((interruptibility != interruptibility_old))
1463 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
1464 }
1465
1466 static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
1467 {
1468 struct vcpu_vmx *vmx = to_vmx(vcpu);
1469 unsigned long value;
1470
1471
1472
1473
1474
1475 if (data & vmx->pt_desc.ctl_bitmask)
1476 return 1;
1477
1478
1479
1480
1481
1482 if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) &&
1483 ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN))
1484 return 1;
1485
1486
1487
1488
1489
1490
1491 if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) &&
1492 !(data & RTIT_CTL_FABRIC_EN) &&
1493 !intel_pt_validate_cap(vmx->pt_desc.caps,
1494 PT_CAP_single_range_output))
1495 return 1;
1496
1497
1498
1499
1500
1501 value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods);
1502 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) &&
1503 !test_bit((data & RTIT_CTL_MTC_RANGE) >>
1504 RTIT_CTL_MTC_RANGE_OFFSET, &value))
1505 return 1;
1506 value = intel_pt_validate_cap(vmx->pt_desc.caps,
1507 PT_CAP_cycle_thresholds);
1508 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
1509 !test_bit((data & RTIT_CTL_CYC_THRESH) >>
1510 RTIT_CTL_CYC_THRESH_OFFSET, &value))
1511 return 1;
1512 value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods);
1513 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
1514 !test_bit((data & RTIT_CTL_PSB_FREQ) >>
1515 RTIT_CTL_PSB_FREQ_OFFSET, &value))
1516 return 1;
1517
1518
1519
1520
1521
1522 value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET;
1523 if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2))
1524 return 1;
1525 value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET;
1526 if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2))
1527 return 1;
1528 value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET;
1529 if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2))
1530 return 1;
1531 value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET;
1532 if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2))
1533 return 1;
1534
1535 return 0;
1536 }
1537
1538 static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
1539 {
1540 unsigned long rip;
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550 if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
1551 to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
1552 rip = kvm_rip_read(vcpu);
1553 rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
1554 kvm_rip_write(vcpu, rip);
1555 } else {
1556 if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
1557 return 0;
1558 }
1559
1560
1561 vmx_set_interrupt_shadow(vcpu, 0);
1562
1563 return 1;
1564 }
1565
1566 static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
1567 {
1568
1569
1570
1571
1572
1573
1574 if (kvm_hlt_in_guest(vcpu->kvm) &&
1575 vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
1576 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
1577 }
1578
1579 static void vmx_queue_exception(struct kvm_vcpu *vcpu)
1580 {
1581 struct vcpu_vmx *vmx = to_vmx(vcpu);
1582 unsigned nr = vcpu->arch.exception.nr;
1583 bool has_error_code = vcpu->arch.exception.has_error_code;
1584 u32 error_code = vcpu->arch.exception.error_code;
1585 u32 intr_info = nr | INTR_INFO_VALID_MASK;
1586
1587 kvm_deliver_exception_payload(vcpu);
1588
1589 if (has_error_code) {
1590 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
1591 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
1592 }
1593
1594 if (vmx->rmode.vm86_active) {
1595 int inc_eip = 0;
1596 if (kvm_exception_is_soft(nr))
1597 inc_eip = vcpu->arch.event_exit_inst_len;
1598 kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
1599 return;
1600 }
1601
1602 WARN_ON_ONCE(vmx->emulation_required);
1603
1604 if (kvm_exception_is_soft(nr)) {
1605 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
1606 vmx->vcpu.arch.event_exit_inst_len);
1607 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
1608 } else
1609 intr_info |= INTR_TYPE_HARD_EXCEPTION;
1610
1611 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
1612
1613 vmx_clear_hlt(vcpu);
1614 }
1615
1616 static bool vmx_rdtscp_supported(void)
1617 {
1618 return cpu_has_vmx_rdtscp();
1619 }
1620
1621 static bool vmx_invpcid_supported(void)
1622 {
1623 return cpu_has_vmx_invpcid();
1624 }
1625
1626
1627
1628
1629 static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
1630 {
1631 struct shared_msr_entry tmp;
1632
1633 tmp = vmx->guest_msrs[to];
1634 vmx->guest_msrs[to] = vmx->guest_msrs[from];
1635 vmx->guest_msrs[from] = tmp;
1636 }
1637
1638
1639
1640
1641
1642
1643 static void setup_msrs(struct vcpu_vmx *vmx)
1644 {
1645 int save_nmsrs, index;
1646
1647 save_nmsrs = 0;
1648 #ifdef CONFIG_X86_64
1649
1650
1651
1652
1653 if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
1654 index = __find_msr_index(vmx, MSR_STAR);
1655 if (index >= 0)
1656 move_msr_up(vmx, index, save_nmsrs++);
1657 index = __find_msr_index(vmx, MSR_LSTAR);
1658 if (index >= 0)
1659 move_msr_up(vmx, index, save_nmsrs++);
1660 index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
1661 if (index >= 0)
1662 move_msr_up(vmx, index, save_nmsrs++);
1663 }
1664 #endif
1665 index = __find_msr_index(vmx, MSR_EFER);
1666 if (index >= 0 && update_transition_efer(vmx, index))
1667 move_msr_up(vmx, index, save_nmsrs++);
1668 index = __find_msr_index(vmx, MSR_TSC_AUX);
1669 if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
1670 move_msr_up(vmx, index, save_nmsrs++);
1671
1672 vmx->save_nmsrs = save_nmsrs;
1673 vmx->guest_msrs_ready = false;
1674
1675 if (cpu_has_vmx_msr_bitmap())
1676 vmx_update_msr_bitmap(&vmx->vcpu);
1677 }
1678
1679 static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
1680 {
1681 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1682
1683 if (is_guest_mode(vcpu) &&
1684 (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
1685 return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
1686
1687 return vcpu->arch.tsc_offset;
1688 }
1689
1690 static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1691 {
1692 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1693 u64 g_tsc_offset = 0;
1694
1695
1696
1697
1698
1699
1700
1701 if (is_guest_mode(vcpu) &&
1702 (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
1703 g_tsc_offset = vmcs12->tsc_offset;
1704
1705 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1706 vcpu->arch.tsc_offset - g_tsc_offset,
1707 offset);
1708 vmcs_write64(TSC_OFFSET, offset + g_tsc_offset);
1709 return offset + g_tsc_offset;
1710 }
1711
1712
1713
1714
1715
1716
1717
1718 bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
1719 {
1720 return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
1721 }
1722
1723 static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
1724 uint64_t val)
1725 {
1726 uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
1727
1728 return !(val & ~valid_bits);
1729 }
1730
1731 static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
1732 {
1733 switch (msr->index) {
1734 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
1735 if (!nested)
1736 return 1;
1737 return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
1738 default:
1739 return 1;
1740 }
1741
1742 return 0;
1743 }
1744
1745
1746
1747
1748
1749
1750 static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1751 {
1752 struct vcpu_vmx *vmx = to_vmx(vcpu);
1753 struct shared_msr_entry *msr;
1754 u32 index;
1755
1756 switch (msr_info->index) {
1757 #ifdef CONFIG_X86_64
1758 case MSR_FS_BASE:
1759 msr_info->data = vmcs_readl(GUEST_FS_BASE);
1760 break;
1761 case MSR_GS_BASE:
1762 msr_info->data = vmcs_readl(GUEST_GS_BASE);
1763 break;
1764 case MSR_KERNEL_GS_BASE:
1765 msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
1766 break;
1767 #endif
1768 case MSR_EFER:
1769 return kvm_get_msr_common(vcpu, msr_info);
1770 case MSR_IA32_UMWAIT_CONTROL:
1771 if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
1772 return 1;
1773
1774 msr_info->data = vmx->msr_ia32_umwait_control;
1775 break;
1776 case MSR_IA32_SPEC_CTRL:
1777 if (!msr_info->host_initiated &&
1778 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
1779 return 1;
1780
1781 msr_info->data = to_vmx(vcpu)->spec_ctrl;
1782 break;
1783 case MSR_IA32_SYSENTER_CS:
1784 msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
1785 break;
1786 case MSR_IA32_SYSENTER_EIP:
1787 msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
1788 break;
1789 case MSR_IA32_SYSENTER_ESP:
1790 msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
1791 break;
1792 case MSR_IA32_BNDCFGS:
1793 if (!kvm_mpx_supported() ||
1794 (!msr_info->host_initiated &&
1795 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
1796 return 1;
1797 msr_info->data = vmcs_read64(GUEST_BNDCFGS);
1798 break;
1799 case MSR_IA32_MCG_EXT_CTL:
1800 if (!msr_info->host_initiated &&
1801 !(vmx->msr_ia32_feature_control &
1802 FEATURE_CONTROL_LMCE))
1803 return 1;
1804 msr_info->data = vcpu->arch.mcg_ext_ctl;
1805 break;
1806 case MSR_IA32_FEATURE_CONTROL:
1807 msr_info->data = vmx->msr_ia32_feature_control;
1808 break;
1809 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
1810 if (!nested_vmx_allowed(vcpu))
1811 return 1;
1812 return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
1813 &msr_info->data);
1814 case MSR_IA32_XSS:
1815 if (!vmx_xsaves_supported() ||
1816 (!msr_info->host_initiated &&
1817 !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
1818 guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
1819 return 1;
1820 msr_info->data = vcpu->arch.ia32_xss;
1821 break;
1822 case MSR_IA32_RTIT_CTL:
1823 if (pt_mode != PT_MODE_HOST_GUEST)
1824 return 1;
1825 msr_info->data = vmx->pt_desc.guest.ctl;
1826 break;
1827 case MSR_IA32_RTIT_STATUS:
1828 if (pt_mode != PT_MODE_HOST_GUEST)
1829 return 1;
1830 msr_info->data = vmx->pt_desc.guest.status;
1831 break;
1832 case MSR_IA32_RTIT_CR3_MATCH:
1833 if ((pt_mode != PT_MODE_HOST_GUEST) ||
1834 !intel_pt_validate_cap(vmx->pt_desc.caps,
1835 PT_CAP_cr3_filtering))
1836 return 1;
1837 msr_info->data = vmx->pt_desc.guest.cr3_match;
1838 break;
1839 case MSR_IA32_RTIT_OUTPUT_BASE:
1840 if ((pt_mode != PT_MODE_HOST_GUEST) ||
1841 (!intel_pt_validate_cap(vmx->pt_desc.caps,
1842 PT_CAP_topa_output) &&
1843 !intel_pt_validate_cap(vmx->pt_desc.caps,
1844 PT_CAP_single_range_output)))
1845 return 1;
1846 msr_info->data = vmx->pt_desc.guest.output_base;
1847 break;
1848 case MSR_IA32_RTIT_OUTPUT_MASK:
1849 if ((pt_mode != PT_MODE_HOST_GUEST) ||
1850 (!intel_pt_validate_cap(vmx->pt_desc.caps,
1851 PT_CAP_topa_output) &&
1852 !intel_pt_validate_cap(vmx->pt_desc.caps,
1853 PT_CAP_single_range_output)))
1854 return 1;
1855 msr_info->data = vmx->pt_desc.guest.output_mask;
1856 break;
1857 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
1858 index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
1859 if ((pt_mode != PT_MODE_HOST_GUEST) ||
1860 (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
1861 PT_CAP_num_address_ranges)))
1862 return 1;
1863 if (index % 2)
1864 msr_info->data = vmx->pt_desc.guest.addr_b[index / 2];
1865 else
1866 msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
1867 break;
1868 case MSR_TSC_AUX:
1869 if (!msr_info->host_initiated &&
1870 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
1871 return 1;
1872
1873 default:
1874 msr = find_msr_entry(vmx, msr_info->index);
1875 if (msr) {
1876 msr_info->data = msr->data;
1877 break;
1878 }
1879 return kvm_get_msr_common(vcpu, msr_info);
1880 }
1881
1882 return 0;
1883 }
1884
1885
1886
1887
1888
1889
1890 static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1891 {
1892 struct vcpu_vmx *vmx = to_vmx(vcpu);
1893 struct shared_msr_entry *msr;
1894 int ret = 0;
1895 u32 msr_index = msr_info->index;
1896 u64 data = msr_info->data;
1897 u32 index;
1898
1899 switch (msr_index) {
1900 case MSR_EFER:
1901 ret = kvm_set_msr_common(vcpu, msr_info);
1902 break;
1903 #ifdef CONFIG_X86_64
1904 case MSR_FS_BASE:
1905 vmx_segment_cache_clear(vmx);
1906 vmcs_writel(GUEST_FS_BASE, data);
1907 break;
1908 case MSR_GS_BASE:
1909 vmx_segment_cache_clear(vmx);
1910 vmcs_writel(GUEST_GS_BASE, data);
1911 break;
1912 case MSR_KERNEL_GS_BASE:
1913 vmx_write_guest_kernel_gs_base(vmx, data);
1914 break;
1915 #endif
1916 case MSR_IA32_SYSENTER_CS:
1917 if (is_guest_mode(vcpu))
1918 get_vmcs12(vcpu)->guest_sysenter_cs = data;
1919 vmcs_write32(GUEST_SYSENTER_CS, data);
1920 break;
1921 case MSR_IA32_SYSENTER_EIP:
1922 if (is_guest_mode(vcpu))
1923 get_vmcs12(vcpu)->guest_sysenter_eip = data;
1924 vmcs_writel(GUEST_SYSENTER_EIP, data);
1925 break;
1926 case MSR_IA32_SYSENTER_ESP:
1927 if (is_guest_mode(vcpu))
1928 get_vmcs12(vcpu)->guest_sysenter_esp = data;
1929 vmcs_writel(GUEST_SYSENTER_ESP, data);
1930 break;
1931 case MSR_IA32_DEBUGCTLMSR:
1932 if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
1933 VM_EXIT_SAVE_DEBUG_CONTROLS)
1934 get_vmcs12(vcpu)->guest_ia32_debugctl = data;
1935
1936 ret = kvm_set_msr_common(vcpu, msr_info);
1937 break;
1938
1939 case MSR_IA32_BNDCFGS:
1940 if (!kvm_mpx_supported() ||
1941 (!msr_info->host_initiated &&
1942 !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
1943 return 1;
1944 if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
1945 (data & MSR_IA32_BNDCFGS_RSVD))
1946 return 1;
1947 vmcs_write64(GUEST_BNDCFGS, data);
1948 break;
1949 case MSR_IA32_UMWAIT_CONTROL:
1950 if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
1951 return 1;
1952
1953
1954 if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
1955 return 1;
1956
1957 vmx->msr_ia32_umwait_control = data;
1958 break;
1959 case MSR_IA32_SPEC_CTRL:
1960 if (!msr_info->host_initiated &&
1961 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
1962 return 1;
1963
1964
1965 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
1966 return 1;
1967
1968 vmx->spec_ctrl = data;
1969
1970 if (!data)
1971 break;
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985 vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
1986 MSR_IA32_SPEC_CTRL,
1987 MSR_TYPE_RW);
1988 break;
1989 case MSR_IA32_PRED_CMD:
1990 if (!msr_info->host_initiated &&
1991 !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
1992 return 1;
1993
1994 if (data & ~PRED_CMD_IBPB)
1995 return 1;
1996
1997 if (!data)
1998 break;
1999
2000 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013 vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
2014 MSR_TYPE_W);
2015 break;
2016 case MSR_IA32_CR_PAT:
2017 if (!kvm_pat_valid(data))
2018 return 1;
2019
2020 if (is_guest_mode(vcpu) &&
2021 get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
2022 get_vmcs12(vcpu)->guest_ia32_pat = data;
2023
2024 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2025 vmcs_write64(GUEST_IA32_PAT, data);
2026 vcpu->arch.pat = data;
2027 break;
2028 }
2029 ret = kvm_set_msr_common(vcpu, msr_info);
2030 break;
2031 case MSR_IA32_TSC_ADJUST:
2032 ret = kvm_set_msr_common(vcpu, msr_info);
2033 break;
2034 case MSR_IA32_MCG_EXT_CTL:
2035 if ((!msr_info->host_initiated &&
2036 !(to_vmx(vcpu)->msr_ia32_feature_control &
2037 FEATURE_CONTROL_LMCE)) ||
2038 (data & ~MCG_EXT_CTL_LMCE_EN))
2039 return 1;
2040 vcpu->arch.mcg_ext_ctl = data;
2041 break;
2042 case MSR_IA32_FEATURE_CONTROL:
2043 if (!vmx_feature_control_msr_valid(vcpu, data) ||
2044 (to_vmx(vcpu)->msr_ia32_feature_control &
2045 FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
2046 return 1;
2047 vmx->msr_ia32_feature_control = data;
2048 if (msr_info->host_initiated && data == 0)
2049 vmx_leave_nested(vcpu);
2050 break;
2051 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2052 if (!msr_info->host_initiated)
2053 return 1;
2054 if (!nested_vmx_allowed(vcpu))
2055 return 1;
2056 return vmx_set_vmx_msr(vcpu, msr_index, data);
2057 case MSR_IA32_XSS:
2058 if (!vmx_xsaves_supported() ||
2059 (!msr_info->host_initiated &&
2060 !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
2061 guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
2062 return 1;
2063
2064
2065
2066
2067 if (data != 0)
2068 return 1;
2069 vcpu->arch.ia32_xss = data;
2070 if (vcpu->arch.ia32_xss != host_xss)
2071 add_atomic_switch_msr(vmx, MSR_IA32_XSS,
2072 vcpu->arch.ia32_xss, host_xss, false);
2073 else
2074 clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
2075 break;
2076 case MSR_IA32_RTIT_CTL:
2077 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2078 vmx_rtit_ctl_check(vcpu, data) ||
2079 vmx->nested.vmxon)
2080 return 1;
2081 vmcs_write64(GUEST_IA32_RTIT_CTL, data);
2082 vmx->pt_desc.guest.ctl = data;
2083 pt_update_intercept_for_msr(vmx);
2084 break;
2085 case MSR_IA32_RTIT_STATUS:
2086 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2087 (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2088 (data & MSR_IA32_RTIT_STATUS_MASK))
2089 return 1;
2090 vmx->pt_desc.guest.status = data;
2091 break;
2092 case MSR_IA32_RTIT_CR3_MATCH:
2093 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2094 (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2095 !intel_pt_validate_cap(vmx->pt_desc.caps,
2096 PT_CAP_cr3_filtering))
2097 return 1;
2098 vmx->pt_desc.guest.cr3_match = data;
2099 break;
2100 case MSR_IA32_RTIT_OUTPUT_BASE:
2101 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2102 (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2103 (!intel_pt_validate_cap(vmx->pt_desc.caps,
2104 PT_CAP_topa_output) &&
2105 !intel_pt_validate_cap(vmx->pt_desc.caps,
2106 PT_CAP_single_range_output)) ||
2107 (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK))
2108 return 1;
2109 vmx->pt_desc.guest.output_base = data;
2110 break;
2111 case MSR_IA32_RTIT_OUTPUT_MASK:
2112 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2113 (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2114 (!intel_pt_validate_cap(vmx->pt_desc.caps,
2115 PT_CAP_topa_output) &&
2116 !intel_pt_validate_cap(vmx->pt_desc.caps,
2117 PT_CAP_single_range_output)))
2118 return 1;
2119 vmx->pt_desc.guest.output_mask = data;
2120 break;
2121 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
2122 index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
2123 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2124 (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2125 (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
2126 PT_CAP_num_address_ranges)))
2127 return 1;
2128 if (is_noncanonical_address(data, vcpu))
2129 return 1;
2130 if (index % 2)
2131 vmx->pt_desc.guest.addr_b[index / 2] = data;
2132 else
2133 vmx->pt_desc.guest.addr_a[index / 2] = data;
2134 break;
2135 case MSR_TSC_AUX:
2136 if (!msr_info->host_initiated &&
2137 !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
2138 return 1;
2139
2140 if ((data >> 32) != 0)
2141 return 1;
2142
2143 default:
2144 msr = find_msr_entry(vmx, msr_index);
2145 if (msr) {
2146 u64 old_msr_data = msr->data;
2147 msr->data = data;
2148 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
2149 preempt_disable();
2150 ret = kvm_set_shared_msr(msr->index, msr->data,
2151 msr->mask);
2152 preempt_enable();
2153 if (ret)
2154 msr->data = old_msr_data;
2155 }
2156 break;
2157 }
2158 ret = kvm_set_msr_common(vcpu, msr_info);
2159 }
2160
2161 return ret;
2162 }
2163
2164 static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
2165 {
2166 __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
2167 switch (reg) {
2168 case VCPU_REGS_RSP:
2169 vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
2170 break;
2171 case VCPU_REGS_RIP:
2172 vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
2173 break;
2174 case VCPU_EXREG_PDPTR:
2175 if (enable_ept)
2176 ept_save_pdptrs(vcpu);
2177 break;
2178 default:
2179 break;
2180 }
2181 }
2182
2183 static __init int cpu_has_kvm_support(void)
2184 {
2185 return cpu_has_vmx();
2186 }
2187
2188 static __init int vmx_disabled_by_bios(void)
2189 {
2190 u64 msr;
2191
2192 rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
2193 if (msr & FEATURE_CONTROL_LOCKED) {
2194
2195 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
2196 && tboot_enabled())
2197 return 1;
2198
2199 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
2200 && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
2201 && !tboot_enabled()) {
2202 printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
2203 "activate TXT before enabling KVM\n");
2204 return 1;
2205 }
2206
2207 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
2208 && !tboot_enabled())
2209 return 1;
2210 }
2211
2212 return 0;
2213 }
2214
2215 static void kvm_cpu_vmxon(u64 addr)
2216 {
2217 cr4_set_bits(X86_CR4_VMXE);
2218 intel_pt_handle_vmx(1);
2219
2220 asm volatile ("vmxon %0" : : "m"(addr));
2221 }
2222
2223 static int hardware_enable(void)
2224 {
2225 int cpu = raw_smp_processor_id();
2226 u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
2227 u64 old, test_bits;
2228
2229 if (cr4_read_shadow() & X86_CR4_VMXE)
2230 return -EBUSY;
2231
2232
2233
2234
2235
2236 if (static_branch_unlikely(&enable_evmcs) &&
2237 !hv_get_vp_assist_page(cpu))
2238 return -EFAULT;
2239
2240 rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
2241
2242 test_bits = FEATURE_CONTROL_LOCKED;
2243 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
2244 if (tboot_enabled())
2245 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
2246
2247 if ((old & test_bits) != test_bits) {
2248
2249 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
2250 }
2251 kvm_cpu_vmxon(phys_addr);
2252 if (enable_ept)
2253 ept_sync_global();
2254
2255 return 0;
2256 }
2257
2258 static void vmclear_local_loaded_vmcss(void)
2259 {
2260 int cpu = raw_smp_processor_id();
2261 struct loaded_vmcs *v, *n;
2262
2263 list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
2264 loaded_vmcss_on_cpu_link)
2265 __loaded_vmcs_clear(v);
2266 }
2267
2268
2269
2270
2271
2272 static void kvm_cpu_vmxoff(void)
2273 {
2274 asm volatile (__ex("vmxoff"));
2275
2276 intel_pt_handle_vmx(0);
2277 cr4_clear_bits(X86_CR4_VMXE);
2278 }
2279
2280 static void hardware_disable(void)
2281 {
2282 vmclear_local_loaded_vmcss();
2283 kvm_cpu_vmxoff();
2284 }
2285
2286 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
2287 u32 msr, u32 *result)
2288 {
2289 u32 vmx_msr_low, vmx_msr_high;
2290 u32 ctl = ctl_min | ctl_opt;
2291
2292 rdmsr(msr, vmx_msr_low, vmx_msr_high);
2293
2294 ctl &= vmx_msr_high;
2295 ctl |= vmx_msr_low;
2296
2297
2298 if (ctl_min & ~ctl)
2299 return -EIO;
2300
2301 *result = ctl;
2302 return 0;
2303 }
2304
2305 static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
2306 struct vmx_capability *vmx_cap)
2307 {
2308 u32 vmx_msr_low, vmx_msr_high;
2309 u32 min, opt, min2, opt2;
2310 u32 _pin_based_exec_control = 0;
2311 u32 _cpu_based_exec_control = 0;
2312 u32 _cpu_based_2nd_exec_control = 0;
2313 u32 _vmexit_control = 0;
2314 u32 _vmentry_control = 0;
2315
2316 memset(vmcs_conf, 0, sizeof(*vmcs_conf));
2317 min = CPU_BASED_HLT_EXITING |
2318 #ifdef CONFIG_X86_64
2319 CPU_BASED_CR8_LOAD_EXITING |
2320 CPU_BASED_CR8_STORE_EXITING |
2321 #endif
2322 CPU_BASED_CR3_LOAD_EXITING |
2323 CPU_BASED_CR3_STORE_EXITING |
2324 CPU_BASED_UNCOND_IO_EXITING |
2325 CPU_BASED_MOV_DR_EXITING |
2326 CPU_BASED_USE_TSC_OFFSETING |
2327 CPU_BASED_MWAIT_EXITING |
2328 CPU_BASED_MONITOR_EXITING |
2329 CPU_BASED_INVLPG_EXITING |
2330 CPU_BASED_RDPMC_EXITING;
2331
2332 opt = CPU_BASED_TPR_SHADOW |
2333 CPU_BASED_USE_MSR_BITMAPS |
2334 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
2335 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
2336 &_cpu_based_exec_control) < 0)
2337 return -EIO;
2338 #ifdef CONFIG_X86_64
2339 if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
2340 _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
2341 ~CPU_BASED_CR8_STORE_EXITING;
2342 #endif
2343 if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
2344 min2 = 0;
2345 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2346 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2347 SECONDARY_EXEC_WBINVD_EXITING |
2348 SECONDARY_EXEC_ENABLE_VPID |
2349 SECONDARY_EXEC_ENABLE_EPT |
2350 SECONDARY_EXEC_UNRESTRICTED_GUEST |
2351 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
2352 SECONDARY_EXEC_DESC |
2353 SECONDARY_EXEC_RDTSCP |
2354 SECONDARY_EXEC_ENABLE_INVPCID |
2355 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2356 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2357 SECONDARY_EXEC_SHADOW_VMCS |
2358 SECONDARY_EXEC_XSAVES |
2359 SECONDARY_EXEC_RDSEED_EXITING |
2360 SECONDARY_EXEC_RDRAND_EXITING |
2361 SECONDARY_EXEC_ENABLE_PML |
2362 SECONDARY_EXEC_TSC_SCALING |
2363 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
2364 SECONDARY_EXEC_PT_USE_GPA |
2365 SECONDARY_EXEC_PT_CONCEAL_VMX |
2366 SECONDARY_EXEC_ENABLE_VMFUNC |
2367 SECONDARY_EXEC_ENCLS_EXITING;
2368 if (adjust_vmx_controls(min2, opt2,
2369 MSR_IA32_VMX_PROCBASED_CTLS2,
2370 &_cpu_based_2nd_exec_control) < 0)
2371 return -EIO;
2372 }
2373 #ifndef CONFIG_X86_64
2374 if (!(_cpu_based_2nd_exec_control &
2375 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
2376 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
2377 #endif
2378
2379 if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
2380 _cpu_based_2nd_exec_control &= ~(
2381 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2382 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2383 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
2384
2385 rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
2386 &vmx_cap->ept, &vmx_cap->vpid);
2387
2388 if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
2389
2390
2391 _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
2392 CPU_BASED_CR3_STORE_EXITING |
2393 CPU_BASED_INVLPG_EXITING);
2394 } else if (vmx_cap->ept) {
2395 vmx_cap->ept = 0;
2396 pr_warn_once("EPT CAP should not exist if not support "
2397 "1-setting enable EPT VM-execution control\n");
2398 }
2399 if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
2400 vmx_cap->vpid) {
2401 vmx_cap->vpid = 0;
2402 pr_warn_once("VPID CAP should not exist if not support "
2403 "1-setting enable VPID VM-execution control\n");
2404 }
2405
2406 min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
2407 #ifdef CONFIG_X86_64
2408 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
2409 #endif
2410 opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
2411 VM_EXIT_LOAD_IA32_PAT |
2412 VM_EXIT_LOAD_IA32_EFER |
2413 VM_EXIT_CLEAR_BNDCFGS |
2414 VM_EXIT_PT_CONCEAL_PIP |
2415 VM_EXIT_CLEAR_IA32_RTIT_CTL;
2416 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
2417 &_vmexit_control) < 0)
2418 return -EIO;
2419
2420 min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
2421 opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
2422 PIN_BASED_VMX_PREEMPTION_TIMER;
2423 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
2424 &_pin_based_exec_control) < 0)
2425 return -EIO;
2426
2427 if (cpu_has_broken_vmx_preemption_timer())
2428 _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
2429 if (!(_cpu_based_2nd_exec_control &
2430 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
2431 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
2432
2433 min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
2434 opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
2435 VM_ENTRY_LOAD_IA32_PAT |
2436 VM_ENTRY_LOAD_IA32_EFER |
2437 VM_ENTRY_LOAD_BNDCFGS |
2438 VM_ENTRY_PT_CONCEAL_PIP |
2439 VM_ENTRY_LOAD_IA32_RTIT_CTL;
2440 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
2441 &_vmentry_control) < 0)
2442 return -EIO;
2443
2444
2445
2446
2447
2448
2449
2450 if (boot_cpu_data.x86 == 0x6) {
2451 switch (boot_cpu_data.x86_model) {
2452 case 26:
2453 case 30:
2454 case 37:
2455 case 44:
2456 case 46:
2457 _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
2458 _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
2459 pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
2460 "does not work properly. Using workaround\n");
2461 break;
2462 default:
2463 break;
2464 }
2465 }
2466
2467
2468 rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
2469
2470
2471 if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
2472 return -EIO;
2473
2474 #ifdef CONFIG_X86_64
2475
2476 if (vmx_msr_high & (1u<<16))
2477 return -EIO;
2478 #endif
2479
2480
2481 if (((vmx_msr_high >> 18) & 15) != 6)
2482 return -EIO;
2483
2484 vmcs_conf->size = vmx_msr_high & 0x1fff;
2485 vmcs_conf->order = get_order(vmcs_conf->size);
2486 vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
2487
2488 vmcs_conf->revision_id = vmx_msr_low;
2489
2490 vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
2491 vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
2492 vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
2493 vmcs_conf->vmexit_ctrl = _vmexit_control;
2494 vmcs_conf->vmentry_ctrl = _vmentry_control;
2495
2496 if (static_branch_unlikely(&enable_evmcs))
2497 evmcs_sanitize_exec_ctrls(vmcs_conf);
2498
2499 return 0;
2500 }
2501
2502 struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
2503 {
2504 int node = cpu_to_node(cpu);
2505 struct page *pages;
2506 struct vmcs *vmcs;
2507
2508 pages = __alloc_pages_node(node, flags, vmcs_config.order);
2509 if (!pages)
2510 return NULL;
2511 vmcs = page_address(pages);
2512 memset(vmcs, 0, vmcs_config.size);
2513
2514
2515 if (static_branch_unlikely(&enable_evmcs))
2516 vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
2517 else
2518 vmcs->hdr.revision_id = vmcs_config.revision_id;
2519
2520 if (shadow)
2521 vmcs->hdr.shadow_vmcs = 1;
2522 return vmcs;
2523 }
2524
2525 void free_vmcs(struct vmcs *vmcs)
2526 {
2527 free_pages((unsigned long)vmcs, vmcs_config.order);
2528 }
2529
2530
2531
2532
2533 void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
2534 {
2535 if (!loaded_vmcs->vmcs)
2536 return;
2537 loaded_vmcs_clear(loaded_vmcs);
2538 free_vmcs(loaded_vmcs->vmcs);
2539 loaded_vmcs->vmcs = NULL;
2540 if (loaded_vmcs->msr_bitmap)
2541 free_page((unsigned long)loaded_vmcs->msr_bitmap);
2542 WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
2543 }
2544
2545 int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
2546 {
2547 loaded_vmcs->vmcs = alloc_vmcs(false);
2548 if (!loaded_vmcs->vmcs)
2549 return -ENOMEM;
2550
2551 loaded_vmcs->shadow_vmcs = NULL;
2552 loaded_vmcs->hv_timer_soft_disabled = false;
2553 loaded_vmcs_init(loaded_vmcs);
2554
2555 if (cpu_has_vmx_msr_bitmap()) {
2556 loaded_vmcs->msr_bitmap = (unsigned long *)
2557 __get_free_page(GFP_KERNEL_ACCOUNT);
2558 if (!loaded_vmcs->msr_bitmap)
2559 goto out_vmcs;
2560 memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
2561
2562 if (IS_ENABLED(CONFIG_HYPERV) &&
2563 static_branch_unlikely(&enable_evmcs) &&
2564 (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
2565 struct hv_enlightened_vmcs *evmcs =
2566 (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
2567
2568 evmcs->hv_enlightenments_control.msr_bitmap = 1;
2569 }
2570 }
2571
2572 memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
2573 memset(&loaded_vmcs->controls_shadow, 0,
2574 sizeof(struct vmcs_controls_shadow));
2575
2576 return 0;
2577
2578 out_vmcs:
2579 free_loaded_vmcs(loaded_vmcs);
2580 return -ENOMEM;
2581 }
2582
2583 static void free_kvm_area(void)
2584 {
2585 int cpu;
2586
2587 for_each_possible_cpu(cpu) {
2588 free_vmcs(per_cpu(vmxarea, cpu));
2589 per_cpu(vmxarea, cpu) = NULL;
2590 }
2591 }
2592
2593 static __init int alloc_kvm_area(void)
2594 {
2595 int cpu;
2596
2597 for_each_possible_cpu(cpu) {
2598 struct vmcs *vmcs;
2599
2600 vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
2601 if (!vmcs) {
2602 free_kvm_area();
2603 return -ENOMEM;
2604 }
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616 if (static_branch_unlikely(&enable_evmcs))
2617 vmcs->hdr.revision_id = vmcs_config.revision_id;
2618
2619 per_cpu(vmxarea, cpu) = vmcs;
2620 }
2621 return 0;
2622 }
2623
2624 static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
2625 struct kvm_segment *save)
2626 {
2627 if (!emulate_invalid_guest_state) {
2628
2629
2630
2631
2632
2633
2634
2635 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
2636 save->selector &= ~SEGMENT_RPL_MASK;
2637 save->dpl = save->selector & SEGMENT_RPL_MASK;
2638 save->s = 1;
2639 }
2640 vmx_set_segment(vcpu, save, seg);
2641 }
2642
2643 static void enter_pmode(struct kvm_vcpu *vcpu)
2644 {
2645 unsigned long flags;
2646 struct vcpu_vmx *vmx = to_vmx(vcpu);
2647
2648
2649
2650
2651
2652 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2653 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2654 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2655 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2656 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2657 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2658
2659 vmx->rmode.vm86_active = 0;
2660
2661 vmx_segment_cache_clear(vmx);
2662
2663 vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
2664
2665 flags = vmcs_readl(GUEST_RFLAGS);
2666 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
2667 flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
2668 vmcs_writel(GUEST_RFLAGS, flags);
2669
2670 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
2671 (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
2672
2673 update_exception_bitmap(vcpu);
2674
2675 fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
2676 fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
2677 fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2678 fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2679 fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2680 fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2681 }
2682
2683 static void fix_rmode_seg(int seg, struct kvm_segment *save)
2684 {
2685 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2686 struct kvm_segment var = *save;
2687
2688 var.dpl = 0x3;
2689 if (seg == VCPU_SREG_CS)
2690 var.type = 0x3;
2691
2692 if (!emulate_invalid_guest_state) {
2693 var.selector = var.base >> 4;
2694 var.base = var.base & 0xffff0;
2695 var.limit = 0xffff;
2696 var.g = 0;
2697 var.db = 0;
2698 var.present = 1;
2699 var.s = 1;
2700 var.l = 0;
2701 var.unusable = 0;
2702 var.type = 0x3;
2703 var.avl = 0;
2704 if (save->base & 0xf)
2705 printk_once(KERN_WARNING "kvm: segment base is not "
2706 "paragraph aligned when entering "
2707 "protected mode (seg=%d)", seg);
2708 }
2709
2710 vmcs_write16(sf->selector, var.selector);
2711 vmcs_writel(sf->base, var.base);
2712 vmcs_write32(sf->limit, var.limit);
2713 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
2714 }
2715
2716 static void enter_rmode(struct kvm_vcpu *vcpu)
2717 {
2718 unsigned long flags;
2719 struct vcpu_vmx *vmx = to_vmx(vcpu);
2720 struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
2721
2722 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
2723 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2724 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2725 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2726 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2727 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2728 vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2729
2730 vmx->rmode.vm86_active = 1;
2731
2732
2733
2734
2735
2736 if (!kvm_vmx->tss_addr)
2737 printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
2738 "called before entering vcpu\n");
2739
2740 vmx_segment_cache_clear(vmx);
2741
2742 vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
2743 vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
2744 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
2745
2746 flags = vmcs_readl(GUEST_RFLAGS);
2747 vmx->rmode.save_rflags = flags;
2748
2749 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
2750
2751 vmcs_writel(GUEST_RFLAGS, flags);
2752 vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
2753 update_exception_bitmap(vcpu);
2754
2755 fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
2756 fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
2757 fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2758 fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2759 fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2760 fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2761
2762 kvm_mmu_reset_context(vcpu);
2763 }
2764
2765 void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
2766 {
2767 struct vcpu_vmx *vmx = to_vmx(vcpu);
2768 struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
2769
2770 if (!msr)
2771 return;
2772
2773 vcpu->arch.efer = efer;
2774 if (efer & EFER_LMA) {
2775 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
2776 msr->data = efer;
2777 } else {
2778 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
2779
2780 msr->data = efer & ~EFER_LME;
2781 }
2782 setup_msrs(vmx);
2783 }
2784
2785 #ifdef CONFIG_X86_64
2786
2787 static void enter_lmode(struct kvm_vcpu *vcpu)
2788 {
2789 u32 guest_tr_ar;
2790
2791 vmx_segment_cache_clear(to_vmx(vcpu));
2792
2793 guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
2794 if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
2795 pr_debug_ratelimited("%s: tss fixup for long mode. \n",
2796 __func__);
2797 vmcs_write32(GUEST_TR_AR_BYTES,
2798 (guest_tr_ar & ~VMX_AR_TYPE_MASK)
2799 | VMX_AR_TYPE_BUSY_64_TSS);
2800 }
2801 vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
2802 }
2803
2804 static void exit_lmode(struct kvm_vcpu *vcpu)
2805 {
2806 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
2807 vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
2808 }
2809
2810 #endif
2811
2812 static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
2813 {
2814 int vpid = to_vmx(vcpu)->vpid;
2815
2816 if (!vpid_sync_vcpu_addr(vpid, addr))
2817 vpid_sync_context(vpid);
2818
2819
2820
2821
2822
2823
2824 }
2825
2826 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
2827 {
2828 ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
2829
2830 vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
2831 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
2832 }
2833
2834 static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
2835 {
2836 if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
2837 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
2838 __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
2839 }
2840
2841 static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
2842 {
2843 ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
2844
2845 vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
2846 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
2847 }
2848
2849 static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
2850 {
2851 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
2852
2853 if (!test_bit(VCPU_EXREG_PDPTR,
2854 (unsigned long *)&vcpu->arch.regs_dirty))
2855 return;
2856
2857 if (is_pae_paging(vcpu)) {
2858 vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
2859 vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
2860 vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
2861 vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
2862 }
2863 }
2864
2865 void ept_save_pdptrs(struct kvm_vcpu *vcpu)
2866 {
2867 struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
2868
2869 if (is_pae_paging(vcpu)) {
2870 mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
2871 mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
2872 mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
2873 mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
2874 }
2875
2876 __set_bit(VCPU_EXREG_PDPTR,
2877 (unsigned long *)&vcpu->arch.regs_avail);
2878 __set_bit(VCPU_EXREG_PDPTR,
2879 (unsigned long *)&vcpu->arch.regs_dirty);
2880 }
2881
2882 static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
2883 unsigned long cr0,
2884 struct kvm_vcpu *vcpu)
2885 {
2886 struct vcpu_vmx *vmx = to_vmx(vcpu);
2887
2888 if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
2889 vmx_decache_cr3(vcpu);
2890 if (!(cr0 & X86_CR0_PG)) {
2891
2892 exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
2893 CPU_BASED_CR3_STORE_EXITING);
2894 vcpu->arch.cr0 = cr0;
2895 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
2896 } else if (!is_paging(vcpu)) {
2897
2898 exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
2899 CPU_BASED_CR3_STORE_EXITING);
2900 vcpu->arch.cr0 = cr0;
2901 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
2902 }
2903
2904 if (!(cr0 & X86_CR0_WP))
2905 *hw_cr0 &= ~X86_CR0_WP;
2906 }
2907
2908 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
2909 {
2910 struct vcpu_vmx *vmx = to_vmx(vcpu);
2911 unsigned long hw_cr0;
2912
2913 hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
2914 if (enable_unrestricted_guest)
2915 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
2916 else {
2917 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
2918
2919 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
2920 enter_pmode(vcpu);
2921
2922 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
2923 enter_rmode(vcpu);
2924 }
2925
2926 #ifdef CONFIG_X86_64
2927 if (vcpu->arch.efer & EFER_LME) {
2928 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
2929 enter_lmode(vcpu);
2930 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
2931 exit_lmode(vcpu);
2932 }
2933 #endif
2934
2935 if (enable_ept && !enable_unrestricted_guest)
2936 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
2937
2938 vmcs_writel(CR0_READ_SHADOW, cr0);
2939 vmcs_writel(GUEST_CR0, hw_cr0);
2940 vcpu->arch.cr0 = cr0;
2941
2942
2943 vmx->emulation_required = emulation_required(vcpu);
2944 }
2945
2946 static int get_ept_level(struct kvm_vcpu *vcpu)
2947 {
2948
2949 if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
2950 return 4;
2951 if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
2952 return 5;
2953 return 4;
2954 }
2955
2956 u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
2957 {
2958 u64 eptp = VMX_EPTP_MT_WB;
2959
2960 eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
2961
2962 if (enable_ept_ad_bits &&
2963 (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
2964 eptp |= VMX_EPTP_AD_ENABLE_BIT;
2965 eptp |= (root_hpa & PAGE_MASK);
2966
2967 return eptp;
2968 }
2969
2970 void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
2971 {
2972 struct kvm *kvm = vcpu->kvm;
2973 bool update_guest_cr3 = true;
2974 unsigned long guest_cr3;
2975 u64 eptp;
2976
2977 guest_cr3 = cr3;
2978 if (enable_ept) {
2979 eptp = construct_eptp(vcpu, cr3);
2980 vmcs_write64(EPT_POINTER, eptp);
2981
2982 if (kvm_x86_ops->tlb_remote_flush) {
2983 spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
2984 to_vmx(vcpu)->ept_pointer = eptp;
2985 to_kvm_vmx(kvm)->ept_pointers_match
2986 = EPT_POINTERS_CHECK;
2987 spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
2988 }
2989
2990
2991 if (is_guest_mode(vcpu))
2992 update_guest_cr3 = false;
2993 else if (enable_unrestricted_guest || is_paging(vcpu))
2994 guest_cr3 = kvm_read_cr3(vcpu);
2995 else
2996 guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
2997 ept_load_pdptrs(vcpu);
2998 }
2999
3000 if (update_guest_cr3)
3001 vmcs_writel(GUEST_CR3, guest_cr3);
3002 }
3003
3004 int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
3005 {
3006 struct vcpu_vmx *vmx = to_vmx(vcpu);
3007
3008
3009
3010
3011
3012 unsigned long hw_cr4;
3013
3014 hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
3015 if (enable_unrestricted_guest)
3016 hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
3017 else if (vmx->rmode.vm86_active)
3018 hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
3019 else
3020 hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
3021
3022 if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
3023 if (cr4 & X86_CR4_UMIP) {
3024 secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
3025 hw_cr4 &= ~X86_CR4_UMIP;
3026 } else if (!is_guest_mode(vcpu) ||
3027 !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) {
3028 secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
3029 }
3030 }
3031
3032 if (cr4 & X86_CR4_VMXE) {
3033
3034
3035
3036
3037
3038
3039
3040 if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
3041 return 1;
3042 }
3043
3044 if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
3045 return 1;
3046
3047 vcpu->arch.cr4 = cr4;
3048
3049 if (!enable_unrestricted_guest) {
3050 if (enable_ept) {
3051 if (!is_paging(vcpu)) {
3052 hw_cr4 &= ~X86_CR4_PAE;
3053 hw_cr4 |= X86_CR4_PSE;
3054 } else if (!(cr4 & X86_CR4_PAE)) {
3055 hw_cr4 &= ~X86_CR4_PAE;
3056 }
3057 }
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070 if (!is_paging(vcpu))
3071 hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
3072 }
3073
3074 vmcs_writel(CR4_READ_SHADOW, cr4);
3075 vmcs_writel(GUEST_CR4, hw_cr4);
3076 return 0;
3077 }
3078
3079 void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
3080 {
3081 struct vcpu_vmx *vmx = to_vmx(vcpu);
3082 u32 ar;
3083
3084 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3085 *var = vmx->rmode.segs[seg];
3086 if (seg == VCPU_SREG_TR
3087 || var->selector == vmx_read_guest_seg_selector(vmx, seg))
3088 return;
3089 var->base = vmx_read_guest_seg_base(vmx, seg);
3090 var->selector = vmx_read_guest_seg_selector(vmx, seg);
3091 return;
3092 }
3093 var->base = vmx_read_guest_seg_base(vmx, seg);
3094 var->limit = vmx_read_guest_seg_limit(vmx, seg);
3095 var->selector = vmx_read_guest_seg_selector(vmx, seg);
3096 ar = vmx_read_guest_seg_ar(vmx, seg);
3097 var->unusable = (ar >> 16) & 1;
3098 var->type = ar & 15;
3099 var->s = (ar >> 4) & 1;
3100 var->dpl = (ar >> 5) & 3;
3101
3102
3103
3104
3105
3106
3107
3108 var->present = !var->unusable;
3109 var->avl = (ar >> 12) & 1;
3110 var->l = (ar >> 13) & 1;
3111 var->db = (ar >> 14) & 1;
3112 var->g = (ar >> 15) & 1;
3113 }
3114
3115 static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
3116 {
3117 struct kvm_segment s;
3118
3119 if (to_vmx(vcpu)->rmode.vm86_active) {
3120 vmx_get_segment(vcpu, &s, seg);
3121 return s.base;
3122 }
3123 return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
3124 }
3125
3126 int vmx_get_cpl(struct kvm_vcpu *vcpu)
3127 {
3128 struct vcpu_vmx *vmx = to_vmx(vcpu);
3129
3130 if (unlikely(vmx->rmode.vm86_active))
3131 return 0;
3132 else {
3133 int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
3134 return VMX_AR_DPL(ar);
3135 }
3136 }
3137
3138 static u32 vmx_segment_access_rights(struct kvm_segment *var)
3139 {
3140 u32 ar;
3141
3142 if (var->unusable || !var->present)
3143 ar = 1 << 16;
3144 else {
3145 ar = var->type & 15;
3146 ar |= (var->s & 1) << 4;
3147 ar |= (var->dpl & 3) << 5;
3148 ar |= (var->present & 1) << 7;
3149 ar |= (var->avl & 1) << 12;
3150 ar |= (var->l & 1) << 13;
3151 ar |= (var->db & 1) << 14;
3152 ar |= (var->g & 1) << 15;
3153 }
3154
3155 return ar;
3156 }
3157
3158 void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
3159 {
3160 struct vcpu_vmx *vmx = to_vmx(vcpu);
3161 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3162
3163 vmx_segment_cache_clear(vmx);
3164
3165 if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3166 vmx->rmode.segs[seg] = *var;
3167 if (seg == VCPU_SREG_TR)
3168 vmcs_write16(sf->selector, var->selector);
3169 else if (var->s)
3170 fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3171 goto out;
3172 }
3173
3174 vmcs_writel(sf->base, var->base);
3175 vmcs_write32(sf->limit, var->limit);
3176 vmcs_write16(sf->selector, var->selector);
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189 if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
3190 var->type |= 0x1;
3191
3192 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
3193
3194 out:
3195 vmx->emulation_required = emulation_required(vcpu);
3196 }
3197
3198 static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
3199 {
3200 u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
3201
3202 *db = (ar >> 14) & 1;
3203 *l = (ar >> 13) & 1;
3204 }
3205
3206 static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3207 {
3208 dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
3209 dt->address = vmcs_readl(GUEST_IDTR_BASE);
3210 }
3211
3212 static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3213 {
3214 vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
3215 vmcs_writel(GUEST_IDTR_BASE, dt->address);
3216 }
3217
3218 static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3219 {
3220 dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
3221 dt->address = vmcs_readl(GUEST_GDTR_BASE);
3222 }
3223
3224 static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3225 {
3226 vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
3227 vmcs_writel(GUEST_GDTR_BASE, dt->address);
3228 }
3229
3230 static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
3231 {
3232 struct kvm_segment var;
3233 u32 ar;
3234
3235 vmx_get_segment(vcpu, &var, seg);
3236 var.dpl = 0x3;
3237 if (seg == VCPU_SREG_CS)
3238 var.type = 0x3;
3239 ar = vmx_segment_access_rights(&var);
3240
3241 if (var.base != (var.selector << 4))
3242 return false;
3243 if (var.limit != 0xffff)
3244 return false;
3245 if (ar != 0xf3)
3246 return false;
3247
3248 return true;
3249 }
3250
3251 static bool code_segment_valid(struct kvm_vcpu *vcpu)
3252 {
3253 struct kvm_segment cs;
3254 unsigned int cs_rpl;
3255
3256 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
3257 cs_rpl = cs.selector & SEGMENT_RPL_MASK;
3258
3259 if (cs.unusable)
3260 return false;
3261 if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
3262 return false;
3263 if (!cs.s)
3264 return false;
3265 if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
3266 if (cs.dpl > cs_rpl)
3267 return false;
3268 } else {
3269 if (cs.dpl != cs_rpl)
3270 return false;
3271 }
3272 if (!cs.present)
3273 return false;
3274
3275
3276 return true;
3277 }
3278
3279 static bool stack_segment_valid(struct kvm_vcpu *vcpu)
3280 {
3281 struct kvm_segment ss;
3282 unsigned int ss_rpl;
3283
3284 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
3285 ss_rpl = ss.selector & SEGMENT_RPL_MASK;
3286
3287 if (ss.unusable)
3288 return true;
3289 if (ss.type != 3 && ss.type != 7)
3290 return false;
3291 if (!ss.s)
3292 return false;
3293 if (ss.dpl != ss_rpl)
3294 return false;
3295 if (!ss.present)
3296 return false;
3297
3298 return true;
3299 }
3300
3301 static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
3302 {
3303 struct kvm_segment var;
3304 unsigned int rpl;
3305
3306 vmx_get_segment(vcpu, &var, seg);
3307 rpl = var.selector & SEGMENT_RPL_MASK;
3308
3309 if (var.unusable)
3310 return true;
3311 if (!var.s)
3312 return false;
3313 if (!var.present)
3314 return false;
3315 if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
3316 if (var.dpl < rpl)
3317 return false;
3318 }
3319
3320
3321
3322
3323 return true;
3324 }
3325
3326 static bool tr_valid(struct kvm_vcpu *vcpu)
3327 {
3328 struct kvm_segment tr;
3329
3330 vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
3331
3332 if (tr.unusable)
3333 return false;
3334 if (tr.selector & SEGMENT_TI_MASK)
3335 return false;
3336 if (tr.type != 3 && tr.type != 11)
3337 return false;
3338 if (!tr.present)
3339 return false;
3340
3341 return true;
3342 }
3343
3344 static bool ldtr_valid(struct kvm_vcpu *vcpu)
3345 {
3346 struct kvm_segment ldtr;
3347
3348 vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
3349
3350 if (ldtr.unusable)
3351 return true;
3352 if (ldtr.selector & SEGMENT_TI_MASK)
3353 return false;
3354 if (ldtr.type != 2)
3355 return false;
3356 if (!ldtr.present)
3357 return false;
3358
3359 return true;
3360 }
3361
3362 static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
3363 {
3364 struct kvm_segment cs, ss;
3365
3366 vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
3367 vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
3368
3369 return ((cs.selector & SEGMENT_RPL_MASK) ==
3370 (ss.selector & SEGMENT_RPL_MASK));
3371 }
3372
3373
3374
3375
3376
3377
3378 static bool guest_state_valid(struct kvm_vcpu *vcpu)
3379 {
3380 if (enable_unrestricted_guest)
3381 return true;
3382
3383
3384 if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
3385 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
3386 return false;
3387 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
3388 return false;
3389 if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
3390 return false;
3391 if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
3392 return false;
3393 if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
3394 return false;
3395 if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
3396 return false;
3397 } else {
3398
3399 if (!cs_ss_rpl_check(vcpu))
3400 return false;
3401 if (!code_segment_valid(vcpu))
3402 return false;
3403 if (!stack_segment_valid(vcpu))
3404 return false;
3405 if (!data_segment_valid(vcpu, VCPU_SREG_DS))
3406 return false;
3407 if (!data_segment_valid(vcpu, VCPU_SREG_ES))
3408 return false;
3409 if (!data_segment_valid(vcpu, VCPU_SREG_FS))
3410 return false;
3411 if (!data_segment_valid(vcpu, VCPU_SREG_GS))
3412 return false;
3413 if (!tr_valid(vcpu))
3414 return false;
3415 if (!ldtr_valid(vcpu))
3416 return false;
3417 }
3418
3419
3420
3421
3422
3423 return true;
3424 }
3425
3426 static int init_rmode_tss(struct kvm *kvm)
3427 {
3428 gfn_t fn;
3429 u16 data = 0;
3430 int idx, r;
3431
3432 idx = srcu_read_lock(&kvm->srcu);
3433 fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
3434 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
3435 if (r < 0)
3436 goto out;
3437 data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
3438 r = kvm_write_guest_page(kvm, fn++, &data,
3439 TSS_IOPB_BASE_OFFSET, sizeof(u16));
3440 if (r < 0)
3441 goto out;
3442 r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
3443 if (r < 0)
3444 goto out;
3445 r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
3446 if (r < 0)
3447 goto out;
3448 data = ~0;
3449 r = kvm_write_guest_page(kvm, fn, &data,
3450 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
3451 sizeof(u8));
3452 out:
3453 srcu_read_unlock(&kvm->srcu, idx);
3454 return r;
3455 }
3456
3457 static int init_rmode_identity_map(struct kvm *kvm)
3458 {
3459 struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
3460 int i, idx, r = 0;
3461 kvm_pfn_t identity_map_pfn;
3462 u32 tmp;
3463
3464
3465 mutex_lock(&kvm->slots_lock);
3466
3467 if (likely(kvm_vmx->ept_identity_pagetable_done))
3468 goto out2;
3469
3470 if (!kvm_vmx->ept_identity_map_addr)
3471 kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
3472 identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
3473
3474 r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
3475 kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
3476 if (r < 0)
3477 goto out2;
3478
3479 idx = srcu_read_lock(&kvm->srcu);
3480 r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
3481 if (r < 0)
3482 goto out;
3483
3484 for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
3485 tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
3486 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
3487 r = kvm_write_guest_page(kvm, identity_map_pfn,
3488 &tmp, i * sizeof(tmp), sizeof(tmp));
3489 if (r < 0)
3490 goto out;
3491 }
3492 kvm_vmx->ept_identity_pagetable_done = true;
3493
3494 out:
3495 srcu_read_unlock(&kvm->srcu, idx);
3496
3497 out2:
3498 mutex_unlock(&kvm->slots_lock);
3499 return r;
3500 }
3501
3502 static void seg_setup(int seg)
3503 {
3504 const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3505 unsigned int ar;
3506
3507 vmcs_write16(sf->selector, 0);
3508 vmcs_writel(sf->base, 0);
3509 vmcs_write32(sf->limit, 0xffff);
3510 ar = 0x93;
3511 if (seg == VCPU_SREG_CS)
3512 ar |= 0x08;
3513
3514 vmcs_write32(sf->ar_bytes, ar);
3515 }
3516
3517 static int alloc_apic_access_page(struct kvm *kvm)
3518 {
3519 struct page *page;
3520 int r = 0;
3521
3522 mutex_lock(&kvm->slots_lock);
3523 if (kvm->arch.apic_access_page_done)
3524 goto out;
3525 r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
3526 APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
3527 if (r)
3528 goto out;
3529
3530 page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
3531 if (is_error_page(page)) {
3532 r = -EFAULT;
3533 goto out;
3534 }
3535
3536
3537
3538
3539
3540 put_page(page);
3541 kvm->arch.apic_access_page_done = true;
3542 out:
3543 mutex_unlock(&kvm->slots_lock);
3544 return r;
3545 }
3546
3547 int allocate_vpid(void)
3548 {
3549 int vpid;
3550
3551 if (!enable_vpid)
3552 return 0;
3553 spin_lock(&vmx_vpid_lock);
3554 vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
3555 if (vpid < VMX_NR_VPIDS)
3556 __set_bit(vpid, vmx_vpid_bitmap);
3557 else
3558 vpid = 0;
3559 spin_unlock(&vmx_vpid_lock);
3560 return vpid;
3561 }
3562
3563 void free_vpid(int vpid)
3564 {
3565 if (!enable_vpid || vpid == 0)
3566 return;
3567 spin_lock(&vmx_vpid_lock);
3568 __clear_bit(vpid, vmx_vpid_bitmap);
3569 spin_unlock(&vmx_vpid_lock);
3570 }
3571
3572 static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
3573 u32 msr, int type)
3574 {
3575 int f = sizeof(unsigned long);
3576
3577 if (!cpu_has_vmx_msr_bitmap())
3578 return;
3579
3580 if (static_branch_unlikely(&enable_evmcs))
3581 evmcs_touch_msr_bitmap();
3582
3583
3584
3585
3586
3587
3588 if (msr <= 0x1fff) {
3589 if (type & MSR_TYPE_R)
3590
3591 __clear_bit(msr, msr_bitmap + 0x000 / f);
3592
3593 if (type & MSR_TYPE_W)
3594
3595 __clear_bit(msr, msr_bitmap + 0x800 / f);
3596
3597 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3598 msr &= 0x1fff;
3599 if (type & MSR_TYPE_R)
3600
3601 __clear_bit(msr, msr_bitmap + 0x400 / f);
3602
3603 if (type & MSR_TYPE_W)
3604
3605 __clear_bit(msr, msr_bitmap + 0xc00 / f);
3606
3607 }
3608 }
3609
3610 static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
3611 u32 msr, int type)
3612 {
3613 int f = sizeof(unsigned long);
3614
3615 if (!cpu_has_vmx_msr_bitmap())
3616 return;
3617
3618 if (static_branch_unlikely(&enable_evmcs))
3619 evmcs_touch_msr_bitmap();
3620
3621
3622
3623
3624
3625
3626 if (msr <= 0x1fff) {
3627 if (type & MSR_TYPE_R)
3628
3629 __set_bit(msr, msr_bitmap + 0x000 / f);
3630
3631 if (type & MSR_TYPE_W)
3632
3633 __set_bit(msr, msr_bitmap + 0x800 / f);
3634
3635 } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3636 msr &= 0x1fff;
3637 if (type & MSR_TYPE_R)
3638
3639 __set_bit(msr, msr_bitmap + 0x400 / f);
3640
3641 if (type & MSR_TYPE_W)
3642
3643 __set_bit(msr, msr_bitmap + 0xc00 / f);
3644
3645 }
3646 }
3647
3648 static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
3649 u32 msr, int type, bool value)
3650 {
3651 if (value)
3652 vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
3653 else
3654 vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
3655 }
3656
3657 static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
3658 {
3659 u8 mode = 0;
3660
3661 if (cpu_has_secondary_exec_ctrls() &&
3662 (secondary_exec_controls_get(to_vmx(vcpu)) &
3663 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
3664 mode |= MSR_BITMAP_MODE_X2APIC;
3665 if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
3666 mode |= MSR_BITMAP_MODE_X2APIC_APICV;
3667 }
3668
3669 return mode;
3670 }
3671
3672 static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
3673 u8 mode)
3674 {
3675 int msr;
3676
3677 for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
3678 unsigned word = msr / BITS_PER_LONG;
3679 msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
3680 msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
3681 }
3682
3683 if (mode & MSR_BITMAP_MODE_X2APIC) {
3684
3685
3686
3687
3688 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
3689 if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
3690 vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
3691 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
3692 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
3693 }
3694 }
3695 }
3696
3697 void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
3698 {
3699 struct vcpu_vmx *vmx = to_vmx(vcpu);
3700 unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
3701 u8 mode = vmx_msr_bitmap_mode(vcpu);
3702 u8 changed = mode ^ vmx->msr_bitmap_mode;
3703
3704 if (!changed)
3705 return;
3706
3707 if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
3708 vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
3709
3710 vmx->msr_bitmap_mode = mode;
3711 }
3712
3713 void pt_update_intercept_for_msr(struct vcpu_vmx *vmx)
3714 {
3715 unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
3716 bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
3717 u32 i;
3718
3719 vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_STATUS,
3720 MSR_TYPE_RW, flag);
3721 vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_BASE,
3722 MSR_TYPE_RW, flag);
3723 vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_MASK,
3724 MSR_TYPE_RW, flag);
3725 vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_CR3_MATCH,
3726 MSR_TYPE_RW, flag);
3727 for (i = 0; i < vmx->pt_desc.addr_range; i++) {
3728 vmx_set_intercept_for_msr(msr_bitmap,
3729 MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag);
3730 vmx_set_intercept_for_msr(msr_bitmap,
3731 MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag);
3732 }
3733 }
3734
3735 static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
3736 {
3737 return enable_apicv;
3738 }
3739
3740 static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
3741 {
3742 struct vcpu_vmx *vmx = to_vmx(vcpu);
3743 void *vapic_page;
3744 u32 vppr;
3745 int rvi;
3746
3747 if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
3748 !nested_cpu_has_vid(get_vmcs12(vcpu)) ||
3749 WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn))
3750 return false;
3751
3752 rvi = vmx_get_rvi();
3753
3754 vapic_page = vmx->nested.virtual_apic_map.hva;
3755 vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
3756
3757 return ((rvi & 0xf0) > (vppr & 0xf0));
3758 }
3759
3760 static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
3761 bool nested)
3762 {
3763 #ifdef CONFIG_SMP
3764 int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
3765
3766 if (vcpu->mode == IN_GUEST_MODE) {
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792 apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
3793 return true;
3794 }
3795 #endif
3796 return false;
3797 }
3798
3799 static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
3800 int vector)
3801 {
3802 struct vcpu_vmx *vmx = to_vmx(vcpu);
3803
3804 if (is_guest_mode(vcpu) &&
3805 vector == vmx->nested.posted_intr_nv) {
3806
3807
3808
3809
3810 vmx->nested.pi_pending = true;
3811 kvm_make_request(KVM_REQ_EVENT, vcpu);
3812
3813 if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
3814 kvm_vcpu_kick(vcpu);
3815 return 0;
3816 }
3817 return -1;
3818 }
3819
3820
3821
3822
3823
3824
3825
3826 static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
3827 {
3828 struct vcpu_vmx *vmx = to_vmx(vcpu);
3829 int r;
3830
3831 r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
3832 if (!r)
3833 return 0;
3834
3835 if (!vcpu->arch.apicv_active)
3836 return -1;
3837
3838 if (pi_test_and_set_pir(vector, &vmx->pi_desc))
3839 return 0;
3840
3841
3842 if (pi_test_and_set_on(&vmx->pi_desc))
3843 return 0;
3844
3845 if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
3846 kvm_vcpu_kick(vcpu);
3847
3848 return 0;
3849 }
3850
3851
3852
3853
3854
3855
3856
3857 void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
3858 {
3859 u32 low32, high32;
3860 unsigned long tmpl;
3861 unsigned long cr0, cr3, cr4;
3862
3863 cr0 = read_cr0();
3864 WARN_ON(cr0 & X86_CR0_TS);
3865 vmcs_writel(HOST_CR0, cr0);
3866
3867
3868
3869
3870
3871 cr3 = __read_cr3();
3872 vmcs_writel(HOST_CR3, cr3);
3873 vmx->loaded_vmcs->host_state.cr3 = cr3;
3874
3875
3876 cr4 = cr4_read_shadow();
3877 vmcs_writel(HOST_CR4, cr4);
3878 vmx->loaded_vmcs->host_state.cr4 = cr4;
3879
3880 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);
3881 #ifdef CONFIG_X86_64
3882
3883
3884
3885
3886
3887 vmcs_write16(HOST_DS_SELECTOR, 0);
3888 vmcs_write16(HOST_ES_SELECTOR, 0);
3889 #else
3890 vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);
3891 vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);
3892 #endif
3893 vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);
3894 vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);
3895
3896 vmcs_writel(HOST_IDTR_BASE, host_idt_base);
3897
3898 vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit);
3899
3900 rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
3901 vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
3902 rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
3903 vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl);
3904
3905 if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
3906 rdmsr(MSR_IA32_CR_PAT, low32, high32);
3907 vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
3908 }
3909
3910 if (cpu_has_load_ia32_efer())
3911 vmcs_write64(HOST_IA32_EFER, host_efer);
3912 }
3913
3914 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
3915 {
3916 vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
3917 if (enable_ept)
3918 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
3919 if (is_guest_mode(&vmx->vcpu))
3920 vmx->vcpu.arch.cr4_guest_owned_bits &=
3921 ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
3922 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
3923 }
3924
3925 u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
3926 {
3927 u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
3928
3929 if (!kvm_vcpu_apicv_active(&vmx->vcpu))
3930 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
3931
3932 if (!enable_vnmi)
3933 pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
3934
3935 if (!enable_preemption_timer)
3936 pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
3937
3938 return pin_based_exec_ctrl;
3939 }
3940
3941 static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
3942 {
3943 struct vcpu_vmx *vmx = to_vmx(vcpu);
3944
3945 pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
3946 if (cpu_has_secondary_exec_ctrls()) {
3947 if (kvm_vcpu_apicv_active(vcpu))
3948 secondary_exec_controls_setbit(vmx,
3949 SECONDARY_EXEC_APIC_REGISTER_VIRT |
3950 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
3951 else
3952 secondary_exec_controls_clearbit(vmx,
3953 SECONDARY_EXEC_APIC_REGISTER_VIRT |
3954 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
3955 }
3956
3957 if (cpu_has_vmx_msr_bitmap())
3958 vmx_update_msr_bitmap(vcpu);
3959 }
3960
3961 u32 vmx_exec_control(struct vcpu_vmx *vmx)
3962 {
3963 u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
3964
3965 if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
3966 exec_control &= ~CPU_BASED_MOV_DR_EXITING;
3967
3968 if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
3969 exec_control &= ~CPU_BASED_TPR_SHADOW;
3970 #ifdef CONFIG_X86_64
3971 exec_control |= CPU_BASED_CR8_STORE_EXITING |
3972 CPU_BASED_CR8_LOAD_EXITING;
3973 #endif
3974 }
3975 if (!enable_ept)
3976 exec_control |= CPU_BASED_CR3_STORE_EXITING |
3977 CPU_BASED_CR3_LOAD_EXITING |
3978 CPU_BASED_INVLPG_EXITING;
3979 if (kvm_mwait_in_guest(vmx->vcpu.kvm))
3980 exec_control &= ~(CPU_BASED_MWAIT_EXITING |
3981 CPU_BASED_MONITOR_EXITING);
3982 if (kvm_hlt_in_guest(vmx->vcpu.kvm))
3983 exec_control &= ~CPU_BASED_HLT_EXITING;
3984 return exec_control;
3985 }
3986
3987
3988 static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
3989 {
3990 struct kvm_vcpu *vcpu = &vmx->vcpu;
3991
3992 u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
3993
3994 if (pt_mode == PT_MODE_SYSTEM)
3995 exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX);
3996 if (!cpu_need_virtualize_apic_accesses(vcpu))
3997 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
3998 if (vmx->vpid == 0)
3999 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
4000 if (!enable_ept) {
4001 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
4002 enable_unrestricted_guest = 0;
4003 }
4004 if (!enable_unrestricted_guest)
4005 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
4006 if (kvm_pause_in_guest(vmx->vcpu.kvm))
4007 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
4008 if (!kvm_vcpu_apicv_active(vcpu))
4009 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
4010 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4011 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
4012
4013
4014
4015 exec_control &= ~SECONDARY_EXEC_DESC;
4016
4017
4018
4019
4020
4021
4022 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
4023
4024 if (!enable_pml)
4025 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
4026
4027 if (vmx_xsaves_supported()) {
4028
4029 bool xsaves_enabled =
4030 guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
4031 guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
4032
4033 if (!xsaves_enabled)
4034 exec_control &= ~SECONDARY_EXEC_XSAVES;
4035
4036 if (nested) {
4037 if (xsaves_enabled)
4038 vmx->nested.msrs.secondary_ctls_high |=
4039 SECONDARY_EXEC_XSAVES;
4040 else
4041 vmx->nested.msrs.secondary_ctls_high &=
4042 ~SECONDARY_EXEC_XSAVES;
4043 }
4044 }
4045
4046 if (vmx_rdtscp_supported()) {
4047 bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
4048 if (!rdtscp_enabled)
4049 exec_control &= ~SECONDARY_EXEC_RDTSCP;
4050
4051 if (nested) {
4052 if (rdtscp_enabled)
4053 vmx->nested.msrs.secondary_ctls_high |=
4054 SECONDARY_EXEC_RDTSCP;
4055 else
4056 vmx->nested.msrs.secondary_ctls_high &=
4057 ~SECONDARY_EXEC_RDTSCP;
4058 }
4059 }
4060
4061 if (vmx_invpcid_supported()) {
4062
4063 bool invpcid_enabled =
4064 guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
4065 guest_cpuid_has(vcpu, X86_FEATURE_PCID);
4066
4067 if (!invpcid_enabled) {
4068 exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
4069 guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
4070 }
4071
4072 if (nested) {
4073 if (invpcid_enabled)
4074 vmx->nested.msrs.secondary_ctls_high |=
4075 SECONDARY_EXEC_ENABLE_INVPCID;
4076 else
4077 vmx->nested.msrs.secondary_ctls_high &=
4078 ~SECONDARY_EXEC_ENABLE_INVPCID;
4079 }
4080 }
4081
4082 if (vmx_rdrand_supported()) {
4083 bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
4084 if (rdrand_enabled)
4085 exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
4086
4087 if (nested) {
4088 if (rdrand_enabled)
4089 vmx->nested.msrs.secondary_ctls_high |=
4090 SECONDARY_EXEC_RDRAND_EXITING;
4091 else
4092 vmx->nested.msrs.secondary_ctls_high &=
4093 ~SECONDARY_EXEC_RDRAND_EXITING;
4094 }
4095 }
4096
4097 if (vmx_rdseed_supported()) {
4098 bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
4099 if (rdseed_enabled)
4100 exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
4101
4102 if (nested) {
4103 if (rdseed_enabled)
4104 vmx->nested.msrs.secondary_ctls_high |=
4105 SECONDARY_EXEC_RDSEED_EXITING;
4106 else
4107 vmx->nested.msrs.secondary_ctls_high &=
4108 ~SECONDARY_EXEC_RDSEED_EXITING;
4109 }
4110 }
4111
4112 if (vmx_waitpkg_supported()) {
4113 bool waitpkg_enabled =
4114 guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
4115
4116 if (!waitpkg_enabled)
4117 exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4118
4119 if (nested) {
4120 if (waitpkg_enabled)
4121 vmx->nested.msrs.secondary_ctls_high |=
4122 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4123 else
4124 vmx->nested.msrs.secondary_ctls_high &=
4125 ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4126 }
4127 }
4128
4129 vmx->secondary_exec_control = exec_control;
4130 }
4131
4132 static void ept_set_mmio_spte_mask(void)
4133 {
4134
4135
4136
4137
4138 kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
4139 VMX_EPT_MISCONFIG_WX_VALUE, 0);
4140 }
4141
4142 #define VMX_XSS_EXIT_BITMAP 0
4143
4144
4145
4146
4147 static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
4148 {
4149 int i;
4150
4151 if (nested)
4152 nested_vmx_vcpu_setup();
4153
4154 if (cpu_has_vmx_msr_bitmap())
4155 vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
4156
4157 vmcs_write64(VMCS_LINK_POINTER, -1ull);
4158
4159
4160 pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
4161 vmx->hv_deadline_tsc = -1;
4162
4163 exec_controls_set(vmx, vmx_exec_control(vmx));
4164
4165 if (cpu_has_secondary_exec_ctrls()) {
4166 vmx_compute_secondary_exec_control(vmx);
4167 secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
4168 }
4169
4170 if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
4171 vmcs_write64(EOI_EXIT_BITMAP0, 0);
4172 vmcs_write64(EOI_EXIT_BITMAP1, 0);
4173 vmcs_write64(EOI_EXIT_BITMAP2, 0);
4174 vmcs_write64(EOI_EXIT_BITMAP3, 0);
4175
4176 vmcs_write16(GUEST_INTR_STATUS, 0);
4177
4178 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
4179 vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
4180 }
4181
4182 if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
4183 vmcs_write32(PLE_GAP, ple_gap);
4184 vmx->ple_window = ple_window;
4185 vmx->ple_window_dirty = true;
4186 }
4187
4188 vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
4189 vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
4190 vmcs_write32(CR3_TARGET_COUNT, 0);
4191
4192 vmcs_write16(HOST_FS_SELECTOR, 0);
4193 vmcs_write16(HOST_GS_SELECTOR, 0);
4194 vmx_set_constant_host_state(vmx);
4195 vmcs_writel(HOST_FS_BASE, 0);
4196 vmcs_writel(HOST_GS_BASE, 0);
4197
4198 if (cpu_has_vmx_vmfunc())
4199 vmcs_write64(VM_FUNCTION_CONTROL, 0);
4200
4201 vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
4202 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
4203 vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
4204 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
4205 vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
4206
4207 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
4208 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
4209
4210 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
4211 u32 index = vmx_msr_index[i];
4212 u32 data_low, data_high;
4213 int j = vmx->nmsrs;
4214
4215 if (rdmsr_safe(index, &data_low, &data_high) < 0)
4216 continue;
4217 if (wrmsr_safe(index, data_low, data_high) < 0)
4218 continue;
4219 vmx->guest_msrs[j].index = i;
4220 vmx->guest_msrs[j].data = 0;
4221 vmx->guest_msrs[j].mask = -1ull;
4222 ++vmx->nmsrs;
4223 }
4224
4225 vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
4226
4227
4228 vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
4229
4230 vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
4231 vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
4232
4233 set_cr4_guest_host_mask(vmx);
4234
4235 if (vmx_xsaves_supported())
4236 vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
4237
4238 if (enable_pml) {
4239 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
4240 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
4241 }
4242
4243 if (cpu_has_vmx_encls_vmexit())
4244 vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
4245
4246 if (pt_mode == PT_MODE_HOST_GUEST) {
4247 memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc));
4248
4249 vmx->pt_desc.guest.output_mask = 0x7F;
4250 vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
4251 }
4252 }
4253
4254 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
4255 {
4256 struct vcpu_vmx *vmx = to_vmx(vcpu);
4257 struct msr_data apic_base_msr;
4258 u64 cr0;
4259
4260 vmx->rmode.vm86_active = 0;
4261 vmx->spec_ctrl = 0;
4262
4263 vmx->msr_ia32_umwait_control = 0;
4264
4265 vcpu->arch.microcode_version = 0x100000000ULL;
4266 vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
4267 vmx->hv_deadline_tsc = -1;
4268 kvm_set_cr8(vcpu, 0);
4269
4270 if (!init_event) {
4271 apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
4272 MSR_IA32_APICBASE_ENABLE;
4273 if (kvm_vcpu_is_reset_bsp(vcpu))
4274 apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
4275 apic_base_msr.host_initiated = true;
4276 kvm_set_apic_base(vcpu, &apic_base_msr);
4277 }
4278
4279 vmx_segment_cache_clear(vmx);
4280
4281 seg_setup(VCPU_SREG_CS);
4282 vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
4283 vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
4284
4285 seg_setup(VCPU_SREG_DS);
4286 seg_setup(VCPU_SREG_ES);
4287 seg_setup(VCPU_SREG_FS);
4288 seg_setup(VCPU_SREG_GS);
4289 seg_setup(VCPU_SREG_SS);
4290
4291 vmcs_write16(GUEST_TR_SELECTOR, 0);
4292 vmcs_writel(GUEST_TR_BASE, 0);
4293 vmcs_write32(GUEST_TR_LIMIT, 0xffff);
4294 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
4295
4296 vmcs_write16(GUEST_LDTR_SELECTOR, 0);
4297 vmcs_writel(GUEST_LDTR_BASE, 0);
4298 vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
4299 vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
4300
4301 if (!init_event) {
4302 vmcs_write32(GUEST_SYSENTER_CS, 0);
4303 vmcs_writel(GUEST_SYSENTER_ESP, 0);
4304 vmcs_writel(GUEST_SYSENTER_EIP, 0);
4305 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
4306 }
4307
4308 kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
4309 kvm_rip_write(vcpu, 0xfff0);
4310
4311 vmcs_writel(GUEST_GDTR_BASE, 0);
4312 vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
4313
4314 vmcs_writel(GUEST_IDTR_BASE, 0);
4315 vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
4316
4317 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
4318 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
4319 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
4320 if (kvm_mpx_supported())
4321 vmcs_write64(GUEST_BNDCFGS, 0);
4322
4323 setup_msrs(vmx);
4324
4325 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
4326
4327 if (cpu_has_vmx_tpr_shadow() && !init_event) {
4328 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
4329 if (cpu_need_tpr_shadow(vcpu))
4330 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
4331 __pa(vcpu->arch.apic->regs));
4332 vmcs_write32(TPR_THRESHOLD, 0);
4333 }
4334
4335 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
4336
4337 if (vmx->vpid != 0)
4338 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
4339
4340 cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
4341 vmx->vcpu.arch.cr0 = cr0;
4342 vmx_set_cr0(vcpu, cr0);
4343 vmx_set_cr4(vcpu, 0);
4344 vmx_set_efer(vcpu, 0);
4345
4346 update_exception_bitmap(vcpu);
4347
4348 vpid_sync_context(vmx->vpid);
4349 if (init_event)
4350 vmx_clear_hlt(vcpu);
4351 }
4352
4353 static void enable_irq_window(struct kvm_vcpu *vcpu)
4354 {
4355 exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
4356 }
4357
4358 static void enable_nmi_window(struct kvm_vcpu *vcpu)
4359 {
4360 if (!enable_vnmi ||
4361 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
4362 enable_irq_window(vcpu);
4363 return;
4364 }
4365
4366 exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
4367 }
4368
4369 static void vmx_inject_irq(struct kvm_vcpu *vcpu)
4370 {
4371 struct vcpu_vmx *vmx = to_vmx(vcpu);
4372 uint32_t intr;
4373 int irq = vcpu->arch.interrupt.nr;
4374
4375 trace_kvm_inj_virq(irq);
4376
4377 ++vcpu->stat.irq_injections;
4378 if (vmx->rmode.vm86_active) {
4379 int inc_eip = 0;
4380 if (vcpu->arch.interrupt.soft)
4381 inc_eip = vcpu->arch.event_exit_inst_len;
4382 kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
4383 return;
4384 }
4385 intr = irq | INTR_INFO_VALID_MASK;
4386 if (vcpu->arch.interrupt.soft) {
4387 intr |= INTR_TYPE_SOFT_INTR;
4388 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
4389 vmx->vcpu.arch.event_exit_inst_len);
4390 } else
4391 intr |= INTR_TYPE_EXT_INTR;
4392 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
4393
4394 vmx_clear_hlt(vcpu);
4395 }
4396
4397 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
4398 {
4399 struct vcpu_vmx *vmx = to_vmx(vcpu);
4400
4401 if (!enable_vnmi) {
4402
4403
4404
4405
4406
4407
4408
4409
4410 vmx->loaded_vmcs->soft_vnmi_blocked = 1;
4411 vmx->loaded_vmcs->vnmi_blocked_time = 0;
4412 }
4413
4414 ++vcpu->stat.nmi_injections;
4415 vmx->loaded_vmcs->nmi_known_unmasked = false;
4416
4417 if (vmx->rmode.vm86_active) {
4418 kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
4419 return;
4420 }
4421
4422 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
4423 INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
4424
4425 vmx_clear_hlt(vcpu);
4426 }
4427
4428 bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
4429 {
4430 struct vcpu_vmx *vmx = to_vmx(vcpu);
4431 bool masked;
4432
4433 if (!enable_vnmi)
4434 return vmx->loaded_vmcs->soft_vnmi_blocked;
4435 if (vmx->loaded_vmcs->nmi_known_unmasked)
4436 return false;
4437 masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
4438 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
4439 return masked;
4440 }
4441
4442 void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4443 {
4444 struct vcpu_vmx *vmx = to_vmx(vcpu);
4445
4446 if (!enable_vnmi) {
4447 if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
4448 vmx->loaded_vmcs->soft_vnmi_blocked = masked;
4449 vmx->loaded_vmcs->vnmi_blocked_time = 0;
4450 }
4451 } else {
4452 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
4453 if (masked)
4454 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
4455 GUEST_INTR_STATE_NMI);
4456 else
4457 vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
4458 GUEST_INTR_STATE_NMI);
4459 }
4460 }
4461
4462 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4463 {
4464 if (to_vmx(vcpu)->nested.nested_run_pending)
4465 return 0;
4466
4467 if (!enable_vnmi &&
4468 to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
4469 return 0;
4470
4471 return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
4472 (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
4473 | GUEST_INTR_STATE_NMI));
4474 }
4475
4476 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
4477 {
4478 if (to_vmx(vcpu)->nested.nested_run_pending)
4479 return false;
4480
4481 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
4482 return true;
4483
4484 return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
4485 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
4486 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
4487 }
4488
4489 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
4490 {
4491 int ret;
4492
4493 if (enable_unrestricted_guest)
4494 return 0;
4495
4496 ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
4497 PAGE_SIZE * 3);
4498 if (ret)
4499 return ret;
4500 to_kvm_vmx(kvm)->tss_addr = addr;
4501 return init_rmode_tss(kvm);
4502 }
4503
4504 static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
4505 {
4506 to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
4507 return 0;
4508 }
4509
4510 static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
4511 {
4512 switch (vec) {
4513 case BP_VECTOR:
4514
4515
4516
4517
4518 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
4519 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4520 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
4521 return false;
4522
4523 case DB_VECTOR:
4524 if (vcpu->guest_debug &
4525 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
4526 return false;
4527
4528 case DE_VECTOR:
4529 case OF_VECTOR:
4530 case BR_VECTOR:
4531 case UD_VECTOR:
4532 case DF_VECTOR:
4533 case SS_VECTOR:
4534 case GP_VECTOR:
4535 case MF_VECTOR:
4536 return true;
4537 break;
4538 }
4539 return false;
4540 }
4541
4542 static int handle_rmode_exception(struct kvm_vcpu *vcpu,
4543 int vec, u32 err_code)
4544 {
4545
4546
4547
4548
4549 if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
4550 if (kvm_emulate_instruction(vcpu, 0)) {
4551 if (vcpu->arch.halt_request) {
4552 vcpu->arch.halt_request = 0;
4553 return kvm_vcpu_halt(vcpu);
4554 }
4555 return 1;
4556 }
4557 return 0;
4558 }
4559
4560
4561
4562
4563
4564
4565 kvm_queue_exception(vcpu, vec);
4566 return 1;
4567 }
4568
4569
4570
4571
4572
4573
4574
4575
4576 static void kvm_machine_check(void)
4577 {
4578 #if defined(CONFIG_X86_MCE)
4579 struct pt_regs regs = {
4580 .cs = 3,
4581 .flags = X86_EFLAGS_IF,
4582 };
4583
4584 do_machine_check(®s, 0);
4585 #endif
4586 }
4587
4588 static int handle_machine_check(struct kvm_vcpu *vcpu)
4589 {
4590
4591 return 1;
4592 }
4593
4594 static int handle_exception_nmi(struct kvm_vcpu *vcpu)
4595 {
4596 struct vcpu_vmx *vmx = to_vmx(vcpu);
4597 struct kvm_run *kvm_run = vcpu->run;
4598 u32 intr_info, ex_no, error_code;
4599 unsigned long cr2, rip, dr6;
4600 u32 vect_info;
4601
4602 vect_info = vmx->idt_vectoring_info;
4603 intr_info = vmx->exit_intr_info;
4604
4605 if (is_machine_check(intr_info) || is_nmi(intr_info))
4606 return 1;
4607
4608 if (is_invalid_opcode(intr_info))
4609 return handle_ud(vcpu);
4610
4611 error_code = 0;
4612 if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
4613 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
4614
4615 if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
4616 WARN_ON_ONCE(!enable_vmware_backdoor);
4617
4618
4619
4620
4621
4622
4623 if (error_code) {
4624 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
4625 return 1;
4626 }
4627 return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
4628 }
4629
4630
4631
4632
4633
4634
4635 if ((vect_info & VECTORING_INFO_VALID_MASK) &&
4636 !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
4637 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4638 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
4639 vcpu->run->internal.ndata = 3;
4640 vcpu->run->internal.data[0] = vect_info;
4641 vcpu->run->internal.data[1] = intr_info;
4642 vcpu->run->internal.data[2] = error_code;
4643 return 0;
4644 }
4645
4646 if (is_page_fault(intr_info)) {
4647 cr2 = vmcs_readl(EXIT_QUALIFICATION);
4648
4649 WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
4650 return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
4651 }
4652
4653 ex_no = intr_info & INTR_INFO_VECTOR_MASK;
4654
4655 if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
4656 return handle_rmode_exception(vcpu, ex_no, error_code);
4657
4658 switch (ex_no) {
4659 case AC_VECTOR:
4660 kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
4661 return 1;
4662 case DB_VECTOR:
4663 dr6 = vmcs_readl(EXIT_QUALIFICATION);
4664 if (!(vcpu->guest_debug &
4665 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
4666 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
4667 vcpu->arch.dr6 |= dr6 | DR6_RTM;
4668 if (is_icebp(intr_info))
4669 WARN_ON(!skip_emulated_instruction(vcpu));
4670
4671 kvm_queue_exception(vcpu, DB_VECTOR);
4672 return 1;
4673 }
4674 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
4675 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
4676
4677 case BP_VECTOR:
4678
4679
4680
4681
4682
4683 vmx->vcpu.arch.event_exit_inst_len =
4684 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4685 kvm_run->exit_reason = KVM_EXIT_DEBUG;
4686 rip = kvm_rip_read(vcpu);
4687 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
4688 kvm_run->debug.arch.exception = ex_no;
4689 break;
4690 default:
4691 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
4692 kvm_run->ex.exception = ex_no;
4693 kvm_run->ex.error_code = error_code;
4694 break;
4695 }
4696 return 0;
4697 }
4698
4699 static int handle_external_interrupt(struct kvm_vcpu *vcpu)
4700 {
4701 ++vcpu->stat.irq_exits;
4702 return 1;
4703 }
4704
4705 static int handle_triple_fault(struct kvm_vcpu *vcpu)
4706 {
4707 vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
4708 vcpu->mmio_needed = 0;
4709 return 0;
4710 }
4711
4712 static int handle_io(struct kvm_vcpu *vcpu)
4713 {
4714 unsigned long exit_qualification;
4715 int size, in, string;
4716 unsigned port;
4717
4718 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4719 string = (exit_qualification & 16) != 0;
4720
4721 ++vcpu->stat.io_exits;
4722
4723 if (string)
4724 return kvm_emulate_instruction(vcpu, 0);
4725
4726 port = exit_qualification >> 16;
4727 size = (exit_qualification & 7) + 1;
4728 in = (exit_qualification & 8) != 0;
4729
4730 return kvm_fast_pio(vcpu, size, port, in);
4731 }
4732
4733 static void
4734 vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
4735 {
4736
4737
4738
4739 hypercall[0] = 0x0f;
4740 hypercall[1] = 0x01;
4741 hypercall[2] = 0xc1;
4742 }
4743
4744
4745 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
4746 {
4747 if (is_guest_mode(vcpu)) {
4748 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4749 unsigned long orig_val = val;
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759 val = (val & ~vmcs12->cr0_guest_host_mask) |
4760 (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
4761
4762 if (!nested_guest_cr0_valid(vcpu, val))
4763 return 1;
4764
4765 if (kvm_set_cr0(vcpu, val))
4766 return 1;
4767 vmcs_writel(CR0_READ_SHADOW, orig_val);
4768 return 0;
4769 } else {
4770 if (to_vmx(vcpu)->nested.vmxon &&
4771 !nested_host_cr0_valid(vcpu, val))
4772 return 1;
4773
4774 return kvm_set_cr0(vcpu, val);
4775 }
4776 }
4777
4778 static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
4779 {
4780 if (is_guest_mode(vcpu)) {
4781 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4782 unsigned long orig_val = val;
4783
4784
4785 val = (val & ~vmcs12->cr4_guest_host_mask) |
4786 (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
4787 if (kvm_set_cr4(vcpu, val))
4788 return 1;
4789 vmcs_writel(CR4_READ_SHADOW, orig_val);
4790 return 0;
4791 } else
4792 return kvm_set_cr4(vcpu, val);
4793 }
4794
4795 static int handle_desc(struct kvm_vcpu *vcpu)
4796 {
4797 WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
4798 return kvm_emulate_instruction(vcpu, 0);
4799 }
4800
4801 static int handle_cr(struct kvm_vcpu *vcpu)
4802 {
4803 unsigned long exit_qualification, val;
4804 int cr;
4805 int reg;
4806 int err;
4807 int ret;
4808
4809 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4810 cr = exit_qualification & 15;
4811 reg = (exit_qualification >> 8) & 15;
4812 switch ((exit_qualification >> 4) & 3) {
4813 case 0:
4814 val = kvm_register_readl(vcpu, reg);
4815 trace_kvm_cr_write(cr, val);
4816 switch (cr) {
4817 case 0:
4818 err = handle_set_cr0(vcpu, val);
4819 return kvm_complete_insn_gp(vcpu, err);
4820 case 3:
4821 WARN_ON_ONCE(enable_unrestricted_guest);
4822 err = kvm_set_cr3(vcpu, val);
4823 return kvm_complete_insn_gp(vcpu, err);
4824 case 4:
4825 err = handle_set_cr4(vcpu, val);
4826 return kvm_complete_insn_gp(vcpu, err);
4827 case 8: {
4828 u8 cr8_prev = kvm_get_cr8(vcpu);
4829 u8 cr8 = (u8)val;
4830 err = kvm_set_cr8(vcpu, cr8);
4831 ret = kvm_complete_insn_gp(vcpu, err);
4832 if (lapic_in_kernel(vcpu))
4833 return ret;
4834 if (cr8_prev <= cr8)
4835 return ret;
4836
4837
4838
4839
4840
4841 vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
4842 return 0;
4843 }
4844 }
4845 break;
4846 case 2:
4847 WARN_ONCE(1, "Guest should always own CR0.TS");
4848 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
4849 trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
4850 return kvm_skip_emulated_instruction(vcpu);
4851 case 1:
4852 switch (cr) {
4853 case 3:
4854 WARN_ON_ONCE(enable_unrestricted_guest);
4855 val = kvm_read_cr3(vcpu);
4856 kvm_register_write(vcpu, reg, val);
4857 trace_kvm_cr_read(cr, val);
4858 return kvm_skip_emulated_instruction(vcpu);
4859 case 8:
4860 val = kvm_get_cr8(vcpu);
4861 kvm_register_write(vcpu, reg, val);
4862 trace_kvm_cr_read(cr, val);
4863 return kvm_skip_emulated_instruction(vcpu);
4864 }
4865 break;
4866 case 3:
4867 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
4868 trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
4869 kvm_lmsw(vcpu, val);
4870
4871 return kvm_skip_emulated_instruction(vcpu);
4872 default:
4873 break;
4874 }
4875 vcpu->run->exit_reason = 0;
4876 vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
4877 (int)(exit_qualification >> 4) & 3, cr);
4878 return 0;
4879 }
4880
4881 static int handle_dr(struct kvm_vcpu *vcpu)
4882 {
4883 unsigned long exit_qualification;
4884 int dr, dr7, reg;
4885
4886 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4887 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
4888
4889
4890 if (!kvm_require_dr(vcpu, dr))
4891 return 1;
4892
4893
4894 if (!kvm_require_cpl(vcpu, 0))
4895 return 1;
4896 dr7 = vmcs_readl(GUEST_DR7);
4897 if (dr7 & DR7_GD) {
4898
4899
4900
4901
4902
4903 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
4904 vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
4905 vcpu->run->debug.arch.dr7 = dr7;
4906 vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
4907 vcpu->run->debug.arch.exception = DB_VECTOR;
4908 vcpu->run->exit_reason = KVM_EXIT_DEBUG;
4909 return 0;
4910 } else {
4911 vcpu->arch.dr6 &= ~DR_TRAP_BITS;
4912 vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
4913 kvm_queue_exception(vcpu, DB_VECTOR);
4914 return 1;
4915 }
4916 }
4917
4918 if (vcpu->guest_debug == 0) {
4919 exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
4920
4921
4922
4923
4924
4925
4926 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
4927 return 1;
4928 }
4929
4930 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
4931 if (exit_qualification & TYPE_MOV_FROM_DR) {
4932 unsigned long val;
4933
4934 if (kvm_get_dr(vcpu, dr, &val))
4935 return 1;
4936 kvm_register_write(vcpu, reg, val);
4937 } else
4938 if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
4939 return 1;
4940
4941 return kvm_skip_emulated_instruction(vcpu);
4942 }
4943
4944 static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
4945 {
4946 return vcpu->arch.dr6;
4947 }
4948
4949 static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
4950 {
4951 }
4952
4953 static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
4954 {
4955 get_debugreg(vcpu->arch.db[0], 0);
4956 get_debugreg(vcpu->arch.db[1], 1);
4957 get_debugreg(vcpu->arch.db[2], 2);
4958 get_debugreg(vcpu->arch.db[3], 3);
4959 get_debugreg(vcpu->arch.dr6, 6);
4960 vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
4961
4962 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
4963 exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
4964 }
4965
4966 static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
4967 {
4968 vmcs_writel(GUEST_DR7, val);
4969 }
4970
4971 static int handle_cpuid(struct kvm_vcpu *vcpu)
4972 {
4973 return kvm_emulate_cpuid(vcpu);
4974 }
4975
4976 static int handle_rdmsr(struct kvm_vcpu *vcpu)
4977 {
4978 return kvm_emulate_rdmsr(vcpu);
4979 }
4980
4981 static int handle_wrmsr(struct kvm_vcpu *vcpu)
4982 {
4983 return kvm_emulate_wrmsr(vcpu);
4984 }
4985
4986 static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
4987 {
4988 kvm_apic_update_ppr(vcpu);
4989 return 1;
4990 }
4991
4992 static int handle_interrupt_window(struct kvm_vcpu *vcpu)
4993 {
4994 exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
4995
4996 kvm_make_request(KVM_REQ_EVENT, vcpu);
4997
4998 ++vcpu->stat.irq_window_exits;
4999 return 1;
5000 }
5001
5002 static int handle_halt(struct kvm_vcpu *vcpu)
5003 {
5004 return kvm_emulate_halt(vcpu);
5005 }
5006
5007 static int handle_vmcall(struct kvm_vcpu *vcpu)
5008 {
5009 return kvm_emulate_hypercall(vcpu);
5010 }
5011
5012 static int handle_invd(struct kvm_vcpu *vcpu)
5013 {
5014 return kvm_emulate_instruction(vcpu, 0);
5015 }
5016
5017 static int handle_invlpg(struct kvm_vcpu *vcpu)
5018 {
5019 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5020
5021 kvm_mmu_invlpg(vcpu, exit_qualification);
5022 return kvm_skip_emulated_instruction(vcpu);
5023 }
5024
5025 static int handle_rdpmc(struct kvm_vcpu *vcpu)
5026 {
5027 int err;
5028
5029 err = kvm_rdpmc(vcpu);
5030 return kvm_complete_insn_gp(vcpu, err);
5031 }
5032
5033 static int handle_wbinvd(struct kvm_vcpu *vcpu)
5034 {
5035 return kvm_emulate_wbinvd(vcpu);
5036 }
5037
5038 static int handle_xsetbv(struct kvm_vcpu *vcpu)
5039 {
5040 u64 new_bv = kvm_read_edx_eax(vcpu);
5041 u32 index = kvm_rcx_read(vcpu);
5042
5043 if (kvm_set_xcr(vcpu, index, new_bv) == 0)
5044 return kvm_skip_emulated_instruction(vcpu);
5045 return 1;
5046 }
5047
5048 static int handle_apic_access(struct kvm_vcpu *vcpu)
5049 {
5050 if (likely(fasteoi)) {
5051 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5052 int access_type, offset;
5053
5054 access_type = exit_qualification & APIC_ACCESS_TYPE;
5055 offset = exit_qualification & APIC_ACCESS_OFFSET;
5056
5057
5058
5059
5060
5061 if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
5062 (offset == APIC_EOI)) {
5063 kvm_lapic_set_eoi(vcpu);
5064 return kvm_skip_emulated_instruction(vcpu);
5065 }
5066 }
5067 return kvm_emulate_instruction(vcpu, 0);
5068 }
5069
5070 static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
5071 {
5072 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5073 int vector = exit_qualification & 0xff;
5074
5075
5076 kvm_apic_set_eoi_accelerated(vcpu, vector);
5077 return 1;
5078 }
5079
5080 static int handle_apic_write(struct kvm_vcpu *vcpu)
5081 {
5082 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5083 u32 offset = exit_qualification & 0xfff;
5084
5085
5086 kvm_apic_write_nodecode(vcpu, offset);
5087 return 1;
5088 }
5089
5090 static int handle_task_switch(struct kvm_vcpu *vcpu)
5091 {
5092 struct vcpu_vmx *vmx = to_vmx(vcpu);
5093 unsigned long exit_qualification;
5094 bool has_error_code = false;
5095 u32 error_code = 0;
5096 u16 tss_selector;
5097 int reason, type, idt_v, idt_index;
5098
5099 idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
5100 idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
5101 type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
5102
5103 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5104
5105 reason = (u32)exit_qualification >> 30;
5106 if (reason == TASK_SWITCH_GATE && idt_v) {
5107 switch (type) {
5108 case INTR_TYPE_NMI_INTR:
5109 vcpu->arch.nmi_injected = false;
5110 vmx_set_nmi_mask(vcpu, true);
5111 break;
5112 case INTR_TYPE_EXT_INTR:
5113 case INTR_TYPE_SOFT_INTR:
5114 kvm_clear_interrupt_queue(vcpu);
5115 break;
5116 case INTR_TYPE_HARD_EXCEPTION:
5117 if (vmx->idt_vectoring_info &
5118 VECTORING_INFO_DELIVER_CODE_MASK) {
5119 has_error_code = true;
5120 error_code =
5121 vmcs_read32(IDT_VECTORING_ERROR_CODE);
5122 }
5123
5124 case INTR_TYPE_SOFT_EXCEPTION:
5125 kvm_clear_exception_queue(vcpu);
5126 break;
5127 default:
5128 break;
5129 }
5130 }
5131 tss_selector = exit_qualification;
5132
5133 if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
5134 type != INTR_TYPE_EXT_INTR &&
5135 type != INTR_TYPE_NMI_INTR))
5136 WARN_ON(!skip_emulated_instruction(vcpu));
5137
5138
5139
5140
5141
5142 return kvm_task_switch(vcpu, tss_selector,
5143 type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
5144 reason, has_error_code, error_code);
5145 }
5146
5147 static int handle_ept_violation(struct kvm_vcpu *vcpu)
5148 {
5149 unsigned long exit_qualification;
5150 gpa_t gpa;
5151 u64 error_code;
5152
5153 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5154
5155
5156
5157
5158
5159
5160
5161 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
5162 enable_vnmi &&
5163 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
5164 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
5165
5166 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5167 trace_kvm_page_fault(gpa, exit_qualification);
5168
5169
5170 error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
5171 ? PFERR_USER_MASK : 0;
5172
5173 error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
5174 ? PFERR_WRITE_MASK : 0;
5175
5176 error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
5177 ? PFERR_FETCH_MASK : 0;
5178
5179 error_code |= (exit_qualification &
5180 (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE |
5181 EPT_VIOLATION_EXECUTABLE))
5182 ? PFERR_PRESENT_MASK : 0;
5183
5184 error_code |= (exit_qualification & 0x100) != 0 ?
5185 PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
5186
5187 vcpu->arch.exit_qualification = exit_qualification;
5188 return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
5189 }
5190
5191 static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
5192 {
5193 gpa_t gpa;
5194
5195
5196
5197
5198
5199 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5200 if (!is_guest_mode(vcpu) &&
5201 !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
5202 trace_kvm_fast_mmio(gpa);
5203 return kvm_skip_emulated_instruction(vcpu);
5204 }
5205
5206 return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
5207 }
5208
5209 static int handle_nmi_window(struct kvm_vcpu *vcpu)
5210 {
5211 WARN_ON_ONCE(!enable_vnmi);
5212 exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
5213 ++vcpu->stat.nmi_window_exits;
5214 kvm_make_request(KVM_REQ_EVENT, vcpu);
5215
5216 return 1;
5217 }
5218
5219 static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5220 {
5221 struct vcpu_vmx *vmx = to_vmx(vcpu);
5222 bool intr_window_requested;
5223 unsigned count = 130;
5224
5225
5226
5227
5228
5229
5230 WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
5231
5232 intr_window_requested = exec_controls_get(vmx) &
5233 CPU_BASED_VIRTUAL_INTR_PENDING;
5234
5235 while (vmx->emulation_required && count-- != 0) {
5236 if (intr_window_requested && vmx_interrupt_allowed(vcpu))
5237 return handle_interrupt_window(&vmx->vcpu);
5238
5239 if (kvm_test_request(KVM_REQ_EVENT, vcpu))
5240 return 1;
5241
5242 if (!kvm_emulate_instruction(vcpu, 0))
5243 return 0;
5244
5245 if (vmx->emulation_required && !vmx->rmode.vm86_active &&
5246 vcpu->arch.exception.pending) {
5247 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5248 vcpu->run->internal.suberror =
5249 KVM_INTERNAL_ERROR_EMULATION;
5250 vcpu->run->internal.ndata = 0;
5251 return 0;
5252 }
5253
5254 if (vcpu->arch.halt_request) {
5255 vcpu->arch.halt_request = 0;
5256 return kvm_vcpu_halt(vcpu);
5257 }
5258
5259
5260
5261
5262
5263 if (signal_pending(current))
5264 return 1;
5265
5266 if (need_resched())
5267 schedule();
5268 }
5269
5270 return 1;
5271 }
5272
5273 static void grow_ple_window(struct kvm_vcpu *vcpu)
5274 {
5275 struct vcpu_vmx *vmx = to_vmx(vcpu);
5276 unsigned int old = vmx->ple_window;
5277
5278 vmx->ple_window = __grow_ple_window(old, ple_window,
5279 ple_window_grow,
5280 ple_window_max);
5281
5282 if (vmx->ple_window != old) {
5283 vmx->ple_window_dirty = true;
5284 trace_kvm_ple_window_update(vcpu->vcpu_id,
5285 vmx->ple_window, old);
5286 }
5287 }
5288
5289 static void shrink_ple_window(struct kvm_vcpu *vcpu)
5290 {
5291 struct vcpu_vmx *vmx = to_vmx(vcpu);
5292 unsigned int old = vmx->ple_window;
5293
5294 vmx->ple_window = __shrink_ple_window(old, ple_window,
5295 ple_window_shrink,
5296 ple_window);
5297
5298 if (vmx->ple_window != old) {
5299 vmx->ple_window_dirty = true;
5300 trace_kvm_ple_window_update(vcpu->vcpu_id,
5301 vmx->ple_window, old);
5302 }
5303 }
5304
5305
5306
5307
5308 static void wakeup_handler(void)
5309 {
5310 struct kvm_vcpu *vcpu;
5311 int cpu = smp_processor_id();
5312
5313 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
5314 list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
5315 blocked_vcpu_list) {
5316 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
5317
5318 if (pi_test_on(pi_desc) == 1)
5319 kvm_vcpu_kick(vcpu);
5320 }
5321 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
5322 }
5323
5324 static void vmx_enable_tdp(void)
5325 {
5326 kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
5327 enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
5328 enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
5329 0ull, VMX_EPT_EXECUTABLE_MASK,
5330 cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
5331 VMX_EPT_RWX_MASK, 0ull);
5332
5333 ept_set_mmio_spte_mask();
5334 kvm_enable_tdp();
5335 }
5336
5337
5338
5339
5340
5341 static int handle_pause(struct kvm_vcpu *vcpu)
5342 {
5343 if (!kvm_pause_in_guest(vcpu->kvm))
5344 grow_ple_window(vcpu);
5345
5346
5347
5348
5349
5350
5351
5352 kvm_vcpu_on_spin(vcpu, true);
5353 return kvm_skip_emulated_instruction(vcpu);
5354 }
5355
5356 static int handle_nop(struct kvm_vcpu *vcpu)
5357 {
5358 return kvm_skip_emulated_instruction(vcpu);
5359 }
5360
5361 static int handle_mwait(struct kvm_vcpu *vcpu)
5362 {
5363 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
5364 return handle_nop(vcpu);
5365 }
5366
5367 static int handle_invalid_op(struct kvm_vcpu *vcpu)
5368 {
5369 kvm_queue_exception(vcpu, UD_VECTOR);
5370 return 1;
5371 }
5372
5373 static int handle_monitor_trap(struct kvm_vcpu *vcpu)
5374 {
5375 return 1;
5376 }
5377
5378 static int handle_monitor(struct kvm_vcpu *vcpu)
5379 {
5380 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
5381 return handle_nop(vcpu);
5382 }
5383
5384 static int handle_invpcid(struct kvm_vcpu *vcpu)
5385 {
5386 u32 vmx_instruction_info;
5387 unsigned long type;
5388 bool pcid_enabled;
5389 gva_t gva;
5390 struct x86_exception e;
5391 unsigned i;
5392 unsigned long roots_to_free = 0;
5393 struct {
5394 u64 pcid;
5395 u64 gla;
5396 } operand;
5397
5398 if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
5399 kvm_queue_exception(vcpu, UD_VECTOR);
5400 return 1;
5401 }
5402
5403 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5404 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
5405
5406 if (type > 3) {
5407 kvm_inject_gp(vcpu, 0);
5408 return 1;
5409 }
5410
5411
5412
5413
5414 if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
5415 vmx_instruction_info, false,
5416 sizeof(operand), &gva))
5417 return 1;
5418
5419 if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
5420 kvm_inject_page_fault(vcpu, &e);
5421 return 1;
5422 }
5423
5424 if (operand.pcid >> 12 != 0) {
5425 kvm_inject_gp(vcpu, 0);
5426 return 1;
5427 }
5428
5429 pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
5430
5431 switch (type) {
5432 case INVPCID_TYPE_INDIV_ADDR:
5433 if ((!pcid_enabled && (operand.pcid != 0)) ||
5434 is_noncanonical_address(operand.gla, vcpu)) {
5435 kvm_inject_gp(vcpu, 0);
5436 return 1;
5437 }
5438 kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
5439 return kvm_skip_emulated_instruction(vcpu);
5440
5441 case INVPCID_TYPE_SINGLE_CTXT:
5442 if (!pcid_enabled && (operand.pcid != 0)) {
5443 kvm_inject_gp(vcpu, 0);
5444 return 1;
5445 }
5446
5447 if (kvm_get_active_pcid(vcpu) == operand.pcid) {
5448 kvm_mmu_sync_roots(vcpu);
5449 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5450 }
5451
5452 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
5453 if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3)
5454 == operand.pcid)
5455 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
5456
5457 kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
5458
5459
5460
5461
5462
5463
5464 return kvm_skip_emulated_instruction(vcpu);
5465
5466 case INVPCID_TYPE_ALL_NON_GLOBAL:
5467
5468
5469
5470
5471
5472
5473
5474
5475 case INVPCID_TYPE_ALL_INCL_GLOBAL:
5476 kvm_mmu_unload(vcpu);
5477 return kvm_skip_emulated_instruction(vcpu);
5478
5479 default:
5480 BUG();
5481 }
5482 }
5483
5484 static int handle_pml_full(struct kvm_vcpu *vcpu)
5485 {
5486 unsigned long exit_qualification;
5487
5488 trace_kvm_pml_full(vcpu->vcpu_id);
5489
5490 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5491
5492
5493
5494
5495
5496 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
5497 enable_vnmi &&
5498 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
5499 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
5500 GUEST_INTR_STATE_NMI);
5501
5502
5503
5504
5505
5506 return 1;
5507 }
5508
5509 static int handle_preemption_timer(struct kvm_vcpu *vcpu)
5510 {
5511 struct vcpu_vmx *vmx = to_vmx(vcpu);
5512
5513 if (!vmx->req_immediate_exit &&
5514 !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
5515 kvm_lapic_expired_hv_timer(vcpu);
5516
5517 return 1;
5518 }
5519
5520
5521
5522
5523
5524 static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
5525 {
5526 kvm_queue_exception(vcpu, UD_VECTOR);
5527 return 1;
5528 }
5529
5530 static int handle_encls(struct kvm_vcpu *vcpu)
5531 {
5532
5533
5534
5535
5536
5537 kvm_queue_exception(vcpu, UD_VECTOR);
5538 return 1;
5539 }
5540
5541
5542
5543
5544
5545
5546 static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
5547 [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi,
5548 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
5549 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
5550 [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
5551 [EXIT_REASON_IO_INSTRUCTION] = handle_io,
5552 [EXIT_REASON_CR_ACCESS] = handle_cr,
5553 [EXIT_REASON_DR_ACCESS] = handle_dr,
5554 [EXIT_REASON_CPUID] = handle_cpuid,
5555 [EXIT_REASON_MSR_READ] = handle_rdmsr,
5556 [EXIT_REASON_MSR_WRITE] = handle_wrmsr,
5557 [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window,
5558 [EXIT_REASON_HLT] = handle_halt,
5559 [EXIT_REASON_INVD] = handle_invd,
5560 [EXIT_REASON_INVLPG] = handle_invlpg,
5561 [EXIT_REASON_RDPMC] = handle_rdpmc,
5562 [EXIT_REASON_VMCALL] = handle_vmcall,
5563 [EXIT_REASON_VMCLEAR] = handle_vmx_instruction,
5564 [EXIT_REASON_VMLAUNCH] = handle_vmx_instruction,
5565 [EXIT_REASON_VMPTRLD] = handle_vmx_instruction,
5566 [EXIT_REASON_VMPTRST] = handle_vmx_instruction,
5567 [EXIT_REASON_VMREAD] = handle_vmx_instruction,
5568 [EXIT_REASON_VMRESUME] = handle_vmx_instruction,
5569 [EXIT_REASON_VMWRITE] = handle_vmx_instruction,
5570 [EXIT_REASON_VMOFF] = handle_vmx_instruction,
5571 [EXIT_REASON_VMON] = handle_vmx_instruction,
5572 [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
5573 [EXIT_REASON_APIC_ACCESS] = handle_apic_access,
5574 [EXIT_REASON_APIC_WRITE] = handle_apic_write,
5575 [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced,
5576 [EXIT_REASON_WBINVD] = handle_wbinvd,
5577 [EXIT_REASON_XSETBV] = handle_xsetbv,
5578 [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
5579 [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check,
5580 [EXIT_REASON_GDTR_IDTR] = handle_desc,
5581 [EXIT_REASON_LDTR_TR] = handle_desc,
5582 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
5583 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
5584 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
5585 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
5586 [EXIT_REASON_MONITOR_TRAP_FLAG] = handle_monitor_trap,
5587 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
5588 [EXIT_REASON_INVEPT] = handle_vmx_instruction,
5589 [EXIT_REASON_INVVPID] = handle_vmx_instruction,
5590 [EXIT_REASON_RDRAND] = handle_invalid_op,
5591 [EXIT_REASON_RDSEED] = handle_invalid_op,
5592 [EXIT_REASON_PML_FULL] = handle_pml_full,
5593 [EXIT_REASON_INVPCID] = handle_invpcid,
5594 [EXIT_REASON_VMFUNC] = handle_vmx_instruction,
5595 [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
5596 [EXIT_REASON_ENCLS] = handle_encls,
5597 };
5598
5599 static const int kvm_vmx_max_exit_handlers =
5600 ARRAY_SIZE(kvm_vmx_exit_handlers);
5601
5602 static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
5603 {
5604 *info1 = vmcs_readl(EXIT_QUALIFICATION);
5605 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
5606 }
5607
5608 static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
5609 {
5610 if (vmx->pml_pg) {
5611 __free_page(vmx->pml_pg);
5612 vmx->pml_pg = NULL;
5613 }
5614 }
5615
5616 static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
5617 {
5618 struct vcpu_vmx *vmx = to_vmx(vcpu);
5619 u64 *pml_buf;
5620 u16 pml_idx;
5621
5622 pml_idx = vmcs_read16(GUEST_PML_INDEX);
5623
5624
5625 if (pml_idx == (PML_ENTITY_NUM - 1))
5626 return;
5627
5628
5629 if (pml_idx >= PML_ENTITY_NUM)
5630 pml_idx = 0;
5631 else
5632 pml_idx++;
5633
5634 pml_buf = page_address(vmx->pml_pg);
5635 for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
5636 u64 gpa;
5637
5638 gpa = pml_buf[pml_idx];
5639 WARN_ON(gpa & (PAGE_SIZE - 1));
5640 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
5641 }
5642
5643
5644 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
5645 }
5646
5647
5648
5649
5650
5651 static void kvm_flush_pml_buffers(struct kvm *kvm)
5652 {
5653 int i;
5654 struct kvm_vcpu *vcpu;
5655
5656
5657
5658
5659
5660
5661 kvm_for_each_vcpu(i, vcpu, kvm)
5662 kvm_vcpu_kick(vcpu);
5663 }
5664
5665 static void vmx_dump_sel(char *name, uint32_t sel)
5666 {
5667 pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
5668 name, vmcs_read16(sel),
5669 vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
5670 vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
5671 vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
5672 }
5673
5674 static void vmx_dump_dtsel(char *name, uint32_t limit)
5675 {
5676 pr_err("%s limit=0x%08x, base=0x%016lx\n",
5677 name, vmcs_read32(limit),
5678 vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
5679 }
5680
5681 void dump_vmcs(void)
5682 {
5683 u32 vmentry_ctl, vmexit_ctl;
5684 u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
5685 unsigned long cr4;
5686 u64 efer;
5687 int i, n;
5688
5689 if (!dump_invalid_vmcs) {
5690 pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
5691 return;
5692 }
5693
5694 vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
5695 vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
5696 cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5697 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
5698 cr4 = vmcs_readl(GUEST_CR4);
5699 efer = vmcs_read64(GUEST_IA32_EFER);
5700 secondary_exec_control = 0;
5701 if (cpu_has_secondary_exec_ctrls())
5702 secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
5703
5704 pr_err("*** Guest State ***\n");
5705 pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
5706 vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
5707 vmcs_readl(CR0_GUEST_HOST_MASK));
5708 pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
5709 cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
5710 pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
5711 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
5712 (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
5713 {
5714 pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n",
5715 vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
5716 pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n",
5717 vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
5718 }
5719 pr_err("RSP = 0x%016lx RIP = 0x%016lx\n",
5720 vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
5721 pr_err("RFLAGS=0x%08lx DR7 = 0x%016lx\n",
5722 vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
5723 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
5724 vmcs_readl(GUEST_SYSENTER_ESP),
5725 vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
5726 vmx_dump_sel("CS: ", GUEST_CS_SELECTOR);
5727 vmx_dump_sel("DS: ", GUEST_DS_SELECTOR);
5728 vmx_dump_sel("SS: ", GUEST_SS_SELECTOR);
5729 vmx_dump_sel("ES: ", GUEST_ES_SELECTOR);
5730 vmx_dump_sel("FS: ", GUEST_FS_SELECTOR);
5731 vmx_dump_sel("GS: ", GUEST_GS_SELECTOR);
5732 vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
5733 vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
5734 vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
5735 vmx_dump_sel("TR: ", GUEST_TR_SELECTOR);
5736 if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
5737 (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
5738 pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
5739 efer, vmcs_read64(GUEST_IA32_PAT));
5740 pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
5741 vmcs_read64(GUEST_IA32_DEBUGCTL),
5742 vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
5743 if (cpu_has_load_perf_global_ctrl() &&
5744 vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
5745 pr_err("PerfGlobCtl = 0x%016llx\n",
5746 vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
5747 if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
5748 pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
5749 pr_err("Interruptibility = %08x ActivityState = %08x\n",
5750 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
5751 vmcs_read32(GUEST_ACTIVITY_STATE));
5752 if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
5753 pr_err("InterruptStatus = %04x\n",
5754 vmcs_read16(GUEST_INTR_STATUS));
5755
5756 pr_err("*** Host State ***\n");
5757 pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
5758 vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
5759 pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
5760 vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
5761 vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
5762 vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
5763 vmcs_read16(HOST_TR_SELECTOR));
5764 pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
5765 vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
5766 vmcs_readl(HOST_TR_BASE));
5767 pr_err("GDTBase=%016lx IDTBase=%016lx\n",
5768 vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
5769 pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
5770 vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
5771 vmcs_readl(HOST_CR4));
5772 pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
5773 vmcs_readl(HOST_IA32_SYSENTER_ESP),
5774 vmcs_read32(HOST_IA32_SYSENTER_CS),
5775 vmcs_readl(HOST_IA32_SYSENTER_EIP));
5776 if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
5777 pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
5778 vmcs_read64(HOST_IA32_EFER),
5779 vmcs_read64(HOST_IA32_PAT));
5780 if (cpu_has_load_perf_global_ctrl() &&
5781 vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
5782 pr_err("PerfGlobCtl = 0x%016llx\n",
5783 vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
5784
5785 pr_err("*** Control State ***\n");
5786 pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
5787 pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
5788 pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
5789 pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
5790 vmcs_read32(EXCEPTION_BITMAP),
5791 vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
5792 vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
5793 pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
5794 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
5795 vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
5796 vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
5797 pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
5798 vmcs_read32(VM_EXIT_INTR_INFO),
5799 vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
5800 vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
5801 pr_err(" reason=%08x qualification=%016lx\n",
5802 vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
5803 pr_err("IDTVectoring: info=%08x errcode=%08x\n",
5804 vmcs_read32(IDT_VECTORING_INFO_FIELD),
5805 vmcs_read32(IDT_VECTORING_ERROR_CODE));
5806 pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
5807 if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
5808 pr_err("TSC Multiplier = 0x%016llx\n",
5809 vmcs_read64(TSC_MULTIPLIER));
5810 if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) {
5811 if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
5812 u16 status = vmcs_read16(GUEST_INTR_STATUS);
5813 pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff);
5814 }
5815 pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
5816 if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
5817 pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR));
5818 pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR));
5819 }
5820 if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
5821 pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
5822 if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
5823 pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
5824 n = vmcs_read32(CR3_TARGET_COUNT);
5825 for (i = 0; i + 1 < n; i += 4)
5826 pr_err("CR3 target%u=%016lx target%u=%016lx\n",
5827 i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
5828 i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
5829 if (i < n)
5830 pr_err("CR3 target%u=%016lx\n",
5831 i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
5832 if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
5833 pr_err("PLE Gap=%08x Window=%08x\n",
5834 vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
5835 if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
5836 pr_err("Virtual processor ID = 0x%04x\n",
5837 vmcs_read16(VIRTUAL_PROCESSOR_ID));
5838 }
5839
5840
5841
5842
5843
5844 static int vmx_handle_exit(struct kvm_vcpu *vcpu)
5845 {
5846 struct vcpu_vmx *vmx = to_vmx(vcpu);
5847 u32 exit_reason = vmx->exit_reason;
5848 u32 vectoring_info = vmx->idt_vectoring_info;
5849
5850 trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
5851
5852
5853
5854
5855
5856
5857
5858
5859 if (enable_pml)
5860 vmx_flush_pml_buffer(vcpu);
5861
5862
5863 if (vmx->emulation_required)
5864 return handle_invalid_guest_state(vcpu);
5865
5866 if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
5867 return nested_vmx_reflect_vmexit(vcpu, exit_reason);
5868
5869 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
5870 dump_vmcs();
5871 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
5872 vcpu->run->fail_entry.hardware_entry_failure_reason
5873 = exit_reason;
5874 return 0;
5875 }
5876
5877 if (unlikely(vmx->fail)) {
5878 dump_vmcs();
5879 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
5880 vcpu->run->fail_entry.hardware_entry_failure_reason
5881 = vmcs_read32(VM_INSTRUCTION_ERROR);
5882 return 0;
5883 }
5884
5885
5886
5887
5888
5889
5890
5891
5892 if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
5893 (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
5894 exit_reason != EXIT_REASON_EPT_VIOLATION &&
5895 exit_reason != EXIT_REASON_PML_FULL &&
5896 exit_reason != EXIT_REASON_TASK_SWITCH)) {
5897 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5898 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
5899 vcpu->run->internal.ndata = 3;
5900 vcpu->run->internal.data[0] = vectoring_info;
5901 vcpu->run->internal.data[1] = exit_reason;
5902 vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
5903 if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
5904 vcpu->run->internal.ndata++;
5905 vcpu->run->internal.data[3] =
5906 vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5907 }
5908 return 0;
5909 }
5910
5911 if (unlikely(!enable_vnmi &&
5912 vmx->loaded_vmcs->soft_vnmi_blocked)) {
5913 if (vmx_interrupt_allowed(vcpu)) {
5914 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
5915 } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
5916 vcpu->arch.nmi_pending) {
5917
5918
5919
5920
5921
5922
5923 printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
5924 "state on VCPU %d after 1 s timeout\n",
5925 __func__, vcpu->vcpu_id);
5926 vmx->loaded_vmcs->soft_vnmi_blocked = 0;
5927 }
5928 }
5929
5930 if (exit_reason < kvm_vmx_max_exit_handlers
5931 && kvm_vmx_exit_handlers[exit_reason])
5932 return kvm_vmx_exit_handlers[exit_reason](vcpu);
5933 else {
5934 vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
5935 exit_reason);
5936 dump_vmcs();
5937 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5938 vcpu->run->internal.suberror =
5939 KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
5940 vcpu->run->internal.ndata = 1;
5941 vcpu->run->internal.data[0] = exit_reason;
5942 return 0;
5943 }
5944 }
5945
5946
5947
5948
5949
5950
5951
5952
5953
5954
5955
5956 static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
5957 {
5958 int size = PAGE_SIZE << L1D_CACHE_ORDER;
5959
5960
5961
5962
5963
5964 if (static_branch_likely(&vmx_l1d_flush_cond)) {
5965 bool flush_l1d;
5966
5967
5968
5969
5970
5971
5972 flush_l1d = vcpu->arch.l1tf_flush_l1d;
5973 vcpu->arch.l1tf_flush_l1d = false;
5974
5975
5976
5977
5978
5979 flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
5980 kvm_clear_cpu_l1tf_flush_l1d();
5981
5982 if (!flush_l1d)
5983 return;
5984 }
5985
5986 vcpu->stat.l1d_flush++;
5987
5988 if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
5989 wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
5990 return;
5991 }
5992
5993 asm volatile(
5994
5995 "xorl %%eax, %%eax\n"
5996 ".Lpopulate_tlb:\n\t"
5997 "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
5998 "addl $4096, %%eax\n\t"
5999 "cmpl %%eax, %[size]\n\t"
6000 "jne .Lpopulate_tlb\n\t"
6001 "xorl %%eax, %%eax\n\t"
6002 "cpuid\n\t"
6003
6004 "xorl %%eax, %%eax\n"
6005 ".Lfill_cache:\n"
6006 "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
6007 "addl $64, %%eax\n\t"
6008 "cmpl %%eax, %[size]\n\t"
6009 "jne .Lfill_cache\n\t"
6010 "lfence\n"
6011 :: [flush_pages] "r" (vmx_l1d_flush_pages),
6012 [size] "r" (size)
6013 : "eax", "ebx", "ecx", "edx");
6014 }
6015
6016 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
6017 {
6018 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6019
6020 if (is_guest_mode(vcpu) &&
6021 nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
6022 return;
6023
6024 if (irr == -1 || tpr < irr) {
6025 vmcs_write32(TPR_THRESHOLD, 0);
6026 return;
6027 }
6028
6029 vmcs_write32(TPR_THRESHOLD, irr);
6030 }
6031
6032 void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
6033 {
6034 struct vcpu_vmx *vmx = to_vmx(vcpu);
6035 u32 sec_exec_control;
6036
6037 if (!lapic_in_kernel(vcpu))
6038 return;
6039
6040 if (!flexpriority_enabled &&
6041 !cpu_has_vmx_virtualize_x2apic_mode())
6042 return;
6043
6044
6045 if (is_guest_mode(vcpu)) {
6046 vmx->nested.change_vmcs01_virtual_apic_mode = true;
6047 return;
6048 }
6049
6050 sec_exec_control = secondary_exec_controls_get(vmx);
6051 sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
6052 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
6053
6054 switch (kvm_get_apic_mode(vcpu)) {
6055 case LAPIC_MODE_INVALID:
6056 WARN_ONCE(true, "Invalid local APIC state");
6057 case LAPIC_MODE_DISABLED:
6058 break;
6059 case LAPIC_MODE_XAPIC:
6060 if (flexpriority_enabled) {
6061 sec_exec_control |=
6062 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6063 vmx_flush_tlb(vcpu, true);
6064 }
6065 break;
6066 case LAPIC_MODE_X2APIC:
6067 if (cpu_has_vmx_virtualize_x2apic_mode())
6068 sec_exec_control |=
6069 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
6070 break;
6071 }
6072 secondary_exec_controls_set(vmx, sec_exec_control);
6073
6074 vmx_update_msr_bitmap(vcpu);
6075 }
6076
6077 static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
6078 {
6079 if (!is_guest_mode(vcpu)) {
6080 vmcs_write64(APIC_ACCESS_ADDR, hpa);
6081 vmx_flush_tlb(vcpu, true);
6082 }
6083 }
6084
6085 static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
6086 {
6087 u16 status;
6088 u8 old;
6089
6090 if (max_isr == -1)
6091 max_isr = 0;
6092
6093 status = vmcs_read16(GUEST_INTR_STATUS);
6094 old = status >> 8;
6095 if (max_isr != old) {
6096 status &= 0xff;
6097 status |= max_isr << 8;
6098 vmcs_write16(GUEST_INTR_STATUS, status);
6099 }
6100 }
6101
6102 static void vmx_set_rvi(int vector)
6103 {
6104 u16 status;
6105 u8 old;
6106
6107 if (vector == -1)
6108 vector = 0;
6109
6110 status = vmcs_read16(GUEST_INTR_STATUS);
6111 old = (u8)status & 0xff;
6112 if ((u8)vector != old) {
6113 status &= ~0xff;
6114 status |= (u8)vector;
6115 vmcs_write16(GUEST_INTR_STATUS, status);
6116 }
6117 }
6118
6119 static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
6120 {
6121
6122
6123
6124
6125
6126
6127
6128
6129 if (!is_guest_mode(vcpu))
6130 vmx_set_rvi(max_irr);
6131 }
6132
6133 static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
6134 {
6135 struct vcpu_vmx *vmx = to_vmx(vcpu);
6136 int max_irr;
6137 bool max_irr_updated;
6138
6139 WARN_ON(!vcpu->arch.apicv_active);
6140 if (pi_test_on(&vmx->pi_desc)) {
6141 pi_clear_on(&vmx->pi_desc);
6142
6143
6144
6145
6146 smp_mb__after_atomic();
6147 max_irr_updated =
6148 kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
6149
6150
6151
6152
6153
6154
6155
6156
6157
6158 if (is_guest_mode(vcpu) && max_irr_updated) {
6159 if (nested_exit_on_intr(vcpu))
6160 kvm_vcpu_exiting_guest_mode(vcpu);
6161 else
6162 kvm_make_request(KVM_REQ_EVENT, vcpu);
6163 }
6164 } else {
6165 max_irr = kvm_lapic_find_highest_irr(vcpu);
6166 }
6167 vmx_hwapic_irr_update(vcpu, max_irr);
6168 return max_irr;
6169 }
6170
6171 static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
6172 {
6173 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
6174
6175 return pi_test_on(pi_desc) ||
6176 (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
6177 }
6178
6179 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
6180 {
6181 if (!kvm_vcpu_apicv_active(vcpu))
6182 return;
6183
6184 vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
6185 vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
6186 vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
6187 vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
6188 }
6189
6190 static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
6191 {
6192 struct vcpu_vmx *vmx = to_vmx(vcpu);
6193
6194 pi_clear_on(&vmx->pi_desc);
6195 memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
6196 }
6197
6198 static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
6199 {
6200 vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
6201
6202
6203 if (is_page_fault(vmx->exit_intr_info))
6204 vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
6205
6206
6207 if (is_machine_check(vmx->exit_intr_info))
6208 kvm_machine_check();
6209
6210
6211 if (is_nmi(vmx->exit_intr_info)) {
6212 kvm_before_interrupt(&vmx->vcpu);
6213 asm("int $2");
6214 kvm_after_interrupt(&vmx->vcpu);
6215 }
6216 }
6217
6218 static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
6219 {
6220 unsigned int vector;
6221 unsigned long entry;
6222 #ifdef CONFIG_X86_64
6223 unsigned long tmp;
6224 #endif
6225 gate_desc *desc;
6226 u32 intr_info;
6227
6228 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
6229 if (WARN_ONCE(!is_external_intr(intr_info),
6230 "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
6231 return;
6232
6233 vector = intr_info & INTR_INFO_VECTOR_MASK;
6234 desc = (gate_desc *)host_idt_base + vector;
6235 entry = gate_offset(desc);
6236
6237 kvm_before_interrupt(vcpu);
6238
6239 asm volatile(
6240 #ifdef CONFIG_X86_64
6241 "mov %%" _ASM_SP ", %[sp]\n\t"
6242 "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
6243 "push $%c[ss]\n\t"
6244 "push %[sp]\n\t"
6245 #endif
6246 "pushf\n\t"
6247 __ASM_SIZE(push) " $%c[cs]\n\t"
6248 CALL_NOSPEC
6249 :
6250 #ifdef CONFIG_X86_64
6251 [sp]"=&r"(tmp),
6252 #endif
6253 ASM_CALL_CONSTRAINT
6254 :
6255 THUNK_TARGET(entry),
6256 [ss]"i"(__KERNEL_DS),
6257 [cs]"i"(__KERNEL_CS)
6258 );
6259
6260 kvm_after_interrupt(vcpu);
6261 }
6262 STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
6263
6264 static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
6265 {
6266 struct vcpu_vmx *vmx = to_vmx(vcpu);
6267
6268 if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
6269 handle_external_interrupt_irqoff(vcpu);
6270 else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
6271 handle_exception_nmi_irqoff(vmx);
6272 }
6273
6274 static bool vmx_has_emulated_msr(int index)
6275 {
6276 switch (index) {
6277 case MSR_IA32_SMBASE:
6278
6279
6280
6281
6282 return enable_unrestricted_guest || emulate_invalid_guest_state;
6283 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
6284 return nested;
6285 case MSR_AMD64_VIRT_SPEC_CTRL:
6286
6287 return false;
6288 default:
6289 return true;
6290 }
6291 }
6292
6293 static bool vmx_pt_supported(void)
6294 {
6295 return pt_mode == PT_MODE_HOST_GUEST;
6296 }
6297
6298 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
6299 {
6300 u32 exit_intr_info;
6301 bool unblock_nmi;
6302 u8 vector;
6303 bool idtv_info_valid;
6304
6305 idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
6306
6307 if (enable_vnmi) {
6308 if (vmx->loaded_vmcs->nmi_known_unmasked)
6309 return;
6310
6311
6312
6313
6314 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
6315 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
6316 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
6317
6318
6319
6320
6321
6322
6323
6324
6325
6326
6327 if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
6328 vector != DF_VECTOR && !idtv_info_valid)
6329 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
6330 GUEST_INTR_STATE_NMI);
6331 else
6332 vmx->loaded_vmcs->nmi_known_unmasked =
6333 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
6334 & GUEST_INTR_STATE_NMI);
6335 } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
6336 vmx->loaded_vmcs->vnmi_blocked_time +=
6337 ktime_to_ns(ktime_sub(ktime_get(),
6338 vmx->loaded_vmcs->entry_time));
6339 }
6340
6341 static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
6342 u32 idt_vectoring_info,
6343 int instr_len_field,
6344 int error_code_field)
6345 {
6346 u8 vector;
6347 int type;
6348 bool idtv_info_valid;
6349
6350 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
6351
6352 vcpu->arch.nmi_injected = false;
6353 kvm_clear_exception_queue(vcpu);
6354 kvm_clear_interrupt_queue(vcpu);
6355
6356 if (!idtv_info_valid)
6357 return;
6358
6359 kvm_make_request(KVM_REQ_EVENT, vcpu);
6360
6361 vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
6362 type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
6363
6364 switch (type) {
6365 case INTR_TYPE_NMI_INTR:
6366 vcpu->arch.nmi_injected = true;
6367
6368
6369
6370
6371
6372 vmx_set_nmi_mask(vcpu, false);
6373 break;
6374 case INTR_TYPE_SOFT_EXCEPTION:
6375 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
6376
6377 case INTR_TYPE_HARD_EXCEPTION:
6378 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
6379 u32 err = vmcs_read32(error_code_field);
6380 kvm_requeue_exception_e(vcpu, vector, err);
6381 } else
6382 kvm_requeue_exception(vcpu, vector);
6383 break;
6384 case INTR_TYPE_SOFT_INTR:
6385 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
6386
6387 case INTR_TYPE_EXT_INTR:
6388 kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
6389 break;
6390 default:
6391 break;
6392 }
6393 }
6394
6395 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
6396 {
6397 __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
6398 VM_EXIT_INSTRUCTION_LEN,
6399 IDT_VECTORING_ERROR_CODE);
6400 }
6401
6402 static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
6403 {
6404 __vmx_complete_interrupts(vcpu,
6405 vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
6406 VM_ENTRY_INSTRUCTION_LEN,
6407 VM_ENTRY_EXCEPTION_ERROR_CODE);
6408
6409 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
6410 }
6411
6412 static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
6413 {
6414 int i, nr_msrs;
6415 struct perf_guest_switch_msr *msrs;
6416
6417 msrs = perf_guest_get_msrs(&nr_msrs);
6418
6419 if (!msrs)
6420 return;
6421
6422 for (i = 0; i < nr_msrs; i++)
6423 if (msrs[i].host == msrs[i].guest)
6424 clear_atomic_switch_msr(vmx, msrs[i].msr);
6425 else
6426 add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
6427 msrs[i].host, false);
6428 }
6429
6430 static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx)
6431 {
6432 u32 host_umwait_control;
6433
6434 if (!vmx_has_waitpkg(vmx))
6435 return;
6436
6437 host_umwait_control = get_umwait_control_msr();
6438
6439 if (vmx->msr_ia32_umwait_control != host_umwait_control)
6440 add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
6441 vmx->msr_ia32_umwait_control,
6442 host_umwait_control, false);
6443 else
6444 clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
6445 }
6446
6447 static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
6448 {
6449 struct vcpu_vmx *vmx = to_vmx(vcpu);
6450 u64 tscl;
6451 u32 delta_tsc;
6452
6453 if (vmx->req_immediate_exit) {
6454 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
6455 vmx->loaded_vmcs->hv_timer_soft_disabled = false;
6456 } else if (vmx->hv_deadline_tsc != -1) {
6457 tscl = rdtsc();
6458 if (vmx->hv_deadline_tsc > tscl)
6459
6460 delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
6461 cpu_preemption_timer_multi);
6462 else
6463 delta_tsc = 0;
6464
6465 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
6466 vmx->loaded_vmcs->hv_timer_soft_disabled = false;
6467 } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) {
6468 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1);
6469 vmx->loaded_vmcs->hv_timer_soft_disabled = true;
6470 }
6471 }
6472
6473 void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
6474 {
6475 if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
6476 vmx->loaded_vmcs->host_state.rsp = host_rsp;
6477 vmcs_writel(HOST_RSP, host_rsp);
6478 }
6479 }
6480
6481 bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
6482
6483 static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
6484 {
6485 struct vcpu_vmx *vmx = to_vmx(vcpu);
6486 unsigned long cr3, cr4;
6487
6488
6489 if (unlikely(!enable_vnmi &&
6490 vmx->loaded_vmcs->soft_vnmi_blocked))
6491 vmx->loaded_vmcs->entry_time = ktime_get();
6492
6493
6494
6495 if (vmx->emulation_required)
6496 return;
6497
6498 if (vmx->ple_window_dirty) {
6499 vmx->ple_window_dirty = false;
6500 vmcs_write32(PLE_WINDOW, vmx->ple_window);
6501 }
6502
6503 if (vmx->nested.need_vmcs12_to_shadow_sync)
6504 nested_sync_vmcs12_to_shadow(vcpu);
6505
6506 if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
6507 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
6508 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
6509 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
6510
6511 cr3 = __get_current_cr3_fast();
6512 if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
6513 vmcs_writel(HOST_CR3, cr3);
6514 vmx->loaded_vmcs->host_state.cr3 = cr3;
6515 }
6516
6517 cr4 = cr4_read_shadow();
6518 if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
6519 vmcs_writel(HOST_CR4, cr4);
6520 vmx->loaded_vmcs->host_state.cr4 = cr4;
6521 }
6522
6523
6524
6525
6526
6527
6528 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6529 vmx_set_interrupt_shadow(vcpu, 0);
6530
6531 kvm_load_guest_xcr0(vcpu);
6532
6533 pt_guest_enter(vmx);
6534
6535 atomic_switch_perf_msrs(vmx);
6536 atomic_switch_umwait_control_msr(vmx);
6537
6538 if (enable_preemption_timer)
6539 vmx_update_hv_timer(vcpu);
6540
6541 if (lapic_in_kernel(vcpu) &&
6542 vcpu->arch.apic->lapic_timer.timer_advance_ns)
6543 kvm_wait_lapic_expire(vcpu);
6544
6545
6546
6547
6548
6549
6550
6551 x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
6552
6553
6554 if (static_branch_unlikely(&vmx_l1d_should_flush))
6555 vmx_l1d_flush(vcpu);
6556 else if (static_branch_unlikely(&mds_user_clear))
6557 mds_clear_cpu_buffers();
6558
6559 if (vcpu->arch.cr2 != read_cr2())
6560 write_cr2(vcpu->arch.cr2);
6561
6562 vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
6563 vmx->loaded_vmcs->launched);
6564
6565 vcpu->arch.cr2 = read_cr2();
6566
6567
6568
6569
6570
6571
6572
6573
6574
6575
6576
6577
6578
6579
6580
6581
6582 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
6583 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
6584
6585 x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
6586
6587
6588 if (static_branch_unlikely(&enable_evmcs))
6589 current_evmcs->hv_clean_fields |=
6590 HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
6591
6592 if (static_branch_unlikely(&enable_evmcs))
6593 current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
6594
6595
6596 if (vmx->host_debugctlmsr)
6597 update_debugctlmsr(vmx->host_debugctlmsr);
6598
6599 #ifndef CONFIG_X86_64
6600
6601
6602
6603
6604
6605
6606
6607
6608 loadsegment(ds, __USER_DS);
6609 loadsegment(es, __USER_DS);
6610 #endif
6611
6612 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
6613 | (1 << VCPU_EXREG_RFLAGS)
6614 | (1 << VCPU_EXREG_PDPTR)
6615 | (1 << VCPU_EXREG_SEGMENTS)
6616 | (1 << VCPU_EXREG_CR3));
6617 vcpu->arch.regs_dirty = 0;
6618
6619 pt_guest_exit(vmx);
6620
6621 kvm_put_guest_xcr0(vcpu);
6622
6623 vmx->nested.nested_run_pending = 0;
6624 vmx->idt_vectoring_info = 0;
6625
6626 vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
6627 if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
6628 kvm_machine_check();
6629
6630 if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
6631 return;
6632
6633 vmx->loaded_vmcs->launched = 1;
6634 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
6635
6636 vmx_recover_nmi_blocking(vmx);
6637 vmx_complete_interrupts(vmx);
6638 }
6639
6640 static struct kvm *vmx_vm_alloc(void)
6641 {
6642 struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx),
6643 GFP_KERNEL_ACCOUNT | __GFP_ZERO,
6644 PAGE_KERNEL);
6645
6646 if (!kvm_vmx)
6647 return NULL;
6648
6649 return &kvm_vmx->kvm;
6650 }
6651
6652 static void vmx_vm_free(struct kvm *kvm)
6653 {
6654 kfree(kvm->arch.hyperv.hv_pa_pg);
6655 vfree(to_kvm_vmx(kvm));
6656 }
6657
6658 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
6659 {
6660 struct vcpu_vmx *vmx = to_vmx(vcpu);
6661
6662 if (enable_pml)
6663 vmx_destroy_pml_buffer(vmx);
6664 free_vpid(vmx->vpid);
6665 nested_vmx_free_vcpu(vcpu);
6666 free_loaded_vmcs(vmx->loaded_vmcs);
6667 kfree(vmx->guest_msrs);
6668 kvm_vcpu_uninit(vcpu);
6669 kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
6670 kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
6671 kmem_cache_free(kvm_vcpu_cache, vmx);
6672 }
6673
6674 static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
6675 {
6676 int err;
6677 struct vcpu_vmx *vmx;
6678 unsigned long *msr_bitmap;
6679 int cpu;
6680
6681 BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
6682 "struct kvm_vcpu must be at offset 0 for arch usercopy region");
6683
6684 vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
6685 if (!vmx)
6686 return ERR_PTR(-ENOMEM);
6687
6688 vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
6689 GFP_KERNEL_ACCOUNT);
6690 if (!vmx->vcpu.arch.user_fpu) {
6691 printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
6692 err = -ENOMEM;
6693 goto free_partial_vcpu;
6694 }
6695
6696 vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
6697 GFP_KERNEL_ACCOUNT);
6698 if (!vmx->vcpu.arch.guest_fpu) {
6699 printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
6700 err = -ENOMEM;
6701 goto free_user_fpu;
6702 }
6703
6704 vmx->vpid = allocate_vpid();
6705
6706 err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
6707 if (err)
6708 goto free_vcpu;
6709
6710 err = -ENOMEM;
6711
6712
6713
6714
6715
6716
6717
6718 if (enable_pml) {
6719 vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
6720 if (!vmx->pml_pg)
6721 goto uninit_vcpu;
6722 }
6723
6724 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
6725 BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
6726 > PAGE_SIZE);
6727
6728 if (!vmx->guest_msrs)
6729 goto free_pml;
6730
6731 err = alloc_loaded_vmcs(&vmx->vmcs01);
6732 if (err < 0)
6733 goto free_msrs;
6734
6735 msr_bitmap = vmx->vmcs01.msr_bitmap;
6736 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
6737 vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
6738 vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
6739 vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
6740 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
6741 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
6742 vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
6743 if (kvm_cstate_in_guest(kvm)) {
6744 vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
6745 vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
6746 vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
6747 vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
6748 }
6749 vmx->msr_bitmap_mode = 0;
6750
6751 vmx->loaded_vmcs = &vmx->vmcs01;
6752 cpu = get_cpu();
6753 vmx_vcpu_load(&vmx->vcpu, cpu);
6754 vmx->vcpu.cpu = cpu;
6755 vmx_vcpu_setup(vmx);
6756 vmx_vcpu_put(&vmx->vcpu);
6757 put_cpu();
6758 if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
6759 err = alloc_apic_access_page(kvm);
6760 if (err)
6761 goto free_vmcs;
6762 }
6763
6764 if (enable_ept && !enable_unrestricted_guest) {
6765 err = init_rmode_identity_map(kvm);
6766 if (err)
6767 goto free_vmcs;
6768 }
6769
6770 if (nested)
6771 nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
6772 vmx_capability.ept);
6773 else
6774 memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
6775
6776 vmx->nested.posted_intr_nv = -1;
6777 vmx->nested.current_vmptr = -1ull;
6778
6779 vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
6780
6781
6782
6783
6784
6785 vmx->pi_desc.nv = POSTED_INTR_VECTOR;
6786 vmx->pi_desc.sn = 1;
6787
6788 vmx->ept_pointer = INVALID_PAGE;
6789
6790 return &vmx->vcpu;
6791
6792 free_vmcs:
6793 free_loaded_vmcs(vmx->loaded_vmcs);
6794 free_msrs:
6795 kfree(vmx->guest_msrs);
6796 free_pml:
6797 vmx_destroy_pml_buffer(vmx);
6798 uninit_vcpu:
6799 kvm_vcpu_uninit(&vmx->vcpu);
6800 free_vcpu:
6801 free_vpid(vmx->vpid);
6802 kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
6803 free_user_fpu:
6804 kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
6805 free_partial_vcpu:
6806 kmem_cache_free(kvm_vcpu_cache, vmx);
6807 return ERR_PTR(err);
6808 }
6809
6810 #define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
6811 #define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
6812
6813 static int vmx_vm_init(struct kvm *kvm)
6814 {
6815 spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
6816
6817 if (!ple_gap)
6818 kvm->arch.pause_in_guest = true;
6819
6820 if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
6821 switch (l1tf_mitigation) {
6822 case L1TF_MITIGATION_OFF:
6823 case L1TF_MITIGATION_FLUSH_NOWARN:
6824
6825 break;
6826 case L1TF_MITIGATION_FLUSH:
6827 case L1TF_MITIGATION_FLUSH_NOSMT:
6828 case L1TF_MITIGATION_FULL:
6829
6830
6831
6832
6833 if (sched_smt_active())
6834 pr_warn_once(L1TF_MSG_SMT);
6835 if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
6836 pr_warn_once(L1TF_MSG_L1D);
6837 break;
6838 case L1TF_MITIGATION_FULL_FORCE:
6839
6840 break;
6841 }
6842 }
6843 return 0;
6844 }
6845
6846 static int __init vmx_check_processor_compat(void)
6847 {
6848 struct vmcs_config vmcs_conf;
6849 struct vmx_capability vmx_cap;
6850
6851 if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
6852 return -EIO;
6853 if (nested)
6854 nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
6855 if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
6856 printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
6857 smp_processor_id());
6858 return -EIO;
6859 }
6860 return 0;
6861 }
6862
6863 static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
6864 {
6865 u8 cache;
6866 u64 ipat = 0;
6867
6868
6869
6870
6871
6872
6873
6874
6875
6876
6877
6878
6879 if (is_mmio) {
6880 cache = MTRR_TYPE_UNCACHABLE;
6881 goto exit;
6882 }
6883
6884 if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
6885 ipat = VMX_EPT_IPAT_BIT;
6886 cache = MTRR_TYPE_WRBACK;
6887 goto exit;
6888 }
6889
6890 if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
6891 ipat = VMX_EPT_IPAT_BIT;
6892 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
6893 cache = MTRR_TYPE_WRBACK;
6894 else
6895 cache = MTRR_TYPE_UNCACHABLE;
6896 goto exit;
6897 }
6898
6899 cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
6900
6901 exit:
6902 return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
6903 }
6904
6905 static int vmx_get_lpage_level(void)
6906 {
6907 if (enable_ept && !cpu_has_vmx_ept_1g_page())
6908 return PT_DIRECTORY_LEVEL;
6909 else
6910
6911 return PT_PDPE_LEVEL;
6912 }
6913
6914 static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
6915 {
6916
6917
6918
6919
6920
6921
6922 u32 mask =
6923 SECONDARY_EXEC_SHADOW_VMCS |
6924 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
6925 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
6926 SECONDARY_EXEC_DESC;
6927
6928 u32 new_ctl = vmx->secondary_exec_control;
6929 u32 cur_ctl = secondary_exec_controls_get(vmx);
6930
6931 secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
6932 }
6933
6934
6935
6936
6937
6938 static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
6939 {
6940 struct vcpu_vmx *vmx = to_vmx(vcpu);
6941 struct kvm_cpuid_entry2 *entry;
6942
6943 vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
6944 vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
6945
6946 #define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
6947 if (entry && (entry->_reg & (_cpuid_mask))) \
6948 vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
6949 } while (0)
6950
6951 entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
6952 cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME));
6953 cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME));
6954 cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC));
6955 cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE));
6956 cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE));
6957 cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE));
6958 cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE));
6959 cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE));
6960 cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR));
6961 cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
6962 cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX));
6963 cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX));
6964 cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID));
6965 cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE));
6966
6967 entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
6968 cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE));
6969 cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
6970 cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
6971 cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
6972 cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP));
6973
6974 #undef cr4_fixed1_update
6975 }
6976
6977 static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
6978 {
6979 struct vcpu_vmx *vmx = to_vmx(vcpu);
6980
6981 if (kvm_mpx_supported()) {
6982 bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
6983
6984 if (mpx_enabled) {
6985 vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
6986 vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
6987 } else {
6988 vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
6989 vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
6990 }
6991 }
6992 }
6993
6994 static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
6995 {
6996 struct vcpu_vmx *vmx = to_vmx(vcpu);
6997 struct kvm_cpuid_entry2 *best = NULL;
6998 int i;
6999
7000 for (i = 0; i < PT_CPUID_LEAVES; i++) {
7001 best = kvm_find_cpuid_entry(vcpu, 0x14, i);
7002 if (!best)
7003 return;
7004 vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax;
7005 vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx;
7006 vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx;
7007 vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx;
7008 }
7009
7010
7011 vmx->pt_desc.addr_range = intel_pt_validate_cap(vmx->pt_desc.caps,
7012 PT_CAP_num_address_ranges);
7013
7014
7015 vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS |
7016 RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC);
7017
7018
7019
7020
7021
7022 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering))
7023 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN;
7024
7025
7026
7027
7028
7029 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc))
7030 vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC |
7031 RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ);
7032
7033
7034
7035
7036
7037 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc))
7038 vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN |
7039 RTIT_CTL_BRANCH_EN | RTIT_CTL_MTC_RANGE);
7040
7041
7042 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite))
7043 vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW |
7044 RTIT_CTL_PTW_EN);
7045
7046
7047 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace))
7048 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN;
7049
7050
7051 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output))
7052 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA;
7053
7054
7055 if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys))
7056 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN;
7057
7058
7059 for (i = 0; i < vmx->pt_desc.addr_range; i++)
7060 vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
7061 }
7062
7063 static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
7064 {
7065 struct vcpu_vmx *vmx = to_vmx(vcpu);
7066
7067 if (cpu_has_secondary_exec_ctrls()) {
7068 vmx_compute_secondary_exec_control(vmx);
7069 vmcs_set_secondary_exec_control(vmx);
7070 }
7071
7072 if (nested_vmx_allowed(vcpu))
7073 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
7074 FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
7075 else
7076 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
7077 ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
7078
7079 if (nested_vmx_allowed(vcpu)) {
7080 nested_vmx_cr_fixed1_bits_update(vcpu);
7081 nested_vmx_entry_exit_ctls_update(vcpu);
7082 }
7083
7084 if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
7085 guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
7086 update_intel_pt_cfg(vcpu);
7087 }
7088
7089 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
7090 {
7091 if (func == 1 && nested)
7092 entry->ecx |= bit(X86_FEATURE_VMX);
7093 }
7094
7095 static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
7096 {
7097 to_vmx(vcpu)->req_immediate_exit = true;
7098 }
7099
7100 static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
7101 struct x86_instruction_info *info)
7102 {
7103 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7104 unsigned short port;
7105 bool intercept;
7106 int size;
7107
7108 if (info->intercept == x86_intercept_in ||
7109 info->intercept == x86_intercept_ins) {
7110 port = info->src_val;
7111 size = info->dst_bytes;
7112 } else {
7113 port = info->dst_val;
7114 size = info->src_bytes;
7115 }
7116
7117
7118
7119
7120
7121
7122
7123
7124 if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
7125 intercept = nested_cpu_has(vmcs12,
7126 CPU_BASED_UNCOND_IO_EXITING);
7127 else
7128 intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
7129
7130
7131 return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
7132 }
7133
7134 static int vmx_check_intercept(struct kvm_vcpu *vcpu,
7135 struct x86_instruction_info *info,
7136 enum x86_intercept_stage stage)
7137 {
7138 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7139 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
7140
7141 switch (info->intercept) {
7142
7143
7144
7145
7146 case x86_intercept_rdtscp:
7147 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
7148 ctxt->exception.vector = UD_VECTOR;
7149 ctxt->exception.error_code_valid = false;
7150 return X86EMUL_PROPAGATE_FAULT;
7151 }
7152 break;
7153
7154 case x86_intercept_in:
7155 case x86_intercept_ins:
7156 case x86_intercept_out:
7157 case x86_intercept_outs:
7158 return vmx_check_intercept_io(vcpu, info);
7159
7160 case x86_intercept_lgdt:
7161 case x86_intercept_lidt:
7162 case x86_intercept_lldt:
7163 case x86_intercept_ltr:
7164 case x86_intercept_sgdt:
7165 case x86_intercept_sidt:
7166 case x86_intercept_sldt:
7167 case x86_intercept_str:
7168 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC))
7169 return X86EMUL_CONTINUE;
7170
7171
7172 break;
7173
7174
7175 default:
7176 break;
7177 }
7178
7179 return X86EMUL_UNHANDLEABLE;
7180 }
7181
7182 #ifdef CONFIG_X86_64
7183
7184 static inline int u64_shl_div_u64(u64 a, unsigned int shift,
7185 u64 divisor, u64 *result)
7186 {
7187 u64 low = a << shift, high = a >> (64 - shift);
7188
7189
7190 if (high >= divisor)
7191 return 1;
7192
7193
7194 asm("divq %2\n\t" : "=a" (low), "=d" (high) :
7195 "rm" (divisor), "0" (low), "1" (high));
7196 *result = low;
7197
7198 return 0;
7199 }
7200
7201 static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
7202 bool *expired)
7203 {
7204 struct vcpu_vmx *vmx;
7205 u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
7206 struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
7207
7208 if (kvm_mwait_in_guest(vcpu->kvm) ||
7209 kvm_can_post_timer_interrupt(vcpu))
7210 return -EOPNOTSUPP;
7211
7212 vmx = to_vmx(vcpu);
7213 tscl = rdtsc();
7214 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
7215 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
7216 lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
7217 ktimer->timer_advance_ns);
7218
7219 if (delta_tsc > lapic_timer_advance_cycles)
7220 delta_tsc -= lapic_timer_advance_cycles;
7221 else
7222 delta_tsc = 0;
7223
7224
7225 if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
7226 delta_tsc && u64_shl_div_u64(delta_tsc,
7227 kvm_tsc_scaling_ratio_frac_bits,
7228 vcpu->arch.tsc_scaling_ratio, &delta_tsc))
7229 return -ERANGE;
7230
7231
7232
7233
7234
7235
7236
7237 if (delta_tsc >> (cpu_preemption_timer_multi + 32))
7238 return -ERANGE;
7239
7240 vmx->hv_deadline_tsc = tscl + delta_tsc;
7241 *expired = !delta_tsc;
7242 return 0;
7243 }
7244
7245 static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
7246 {
7247 to_vmx(vcpu)->hv_deadline_tsc = -1;
7248 }
7249 #endif
7250
7251 static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
7252 {
7253 if (!kvm_pause_in_guest(vcpu->kvm))
7254 shrink_ple_window(vcpu);
7255 }
7256
7257 static void vmx_slot_enable_log_dirty(struct kvm *kvm,
7258 struct kvm_memory_slot *slot)
7259 {
7260 kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
7261 kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
7262 }
7263
7264 static void vmx_slot_disable_log_dirty(struct kvm *kvm,
7265 struct kvm_memory_slot *slot)
7266 {
7267 kvm_mmu_slot_set_dirty(kvm, slot);
7268 }
7269
7270 static void vmx_flush_log_dirty(struct kvm *kvm)
7271 {
7272 kvm_flush_pml_buffers(kvm);
7273 }
7274
7275 static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
7276 {
7277 struct vmcs12 *vmcs12;
7278 struct vcpu_vmx *vmx = to_vmx(vcpu);
7279 gpa_t gpa, dst;
7280
7281 if (is_guest_mode(vcpu)) {
7282 WARN_ON_ONCE(vmx->nested.pml_full);
7283
7284
7285
7286
7287
7288
7289 vmcs12 = get_vmcs12(vcpu);
7290 if (!nested_cpu_has_pml(vmcs12))
7291 return 0;
7292
7293 if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
7294 vmx->nested.pml_full = true;
7295 return 1;
7296 }
7297
7298 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
7299 dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
7300
7301 if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
7302 offset_in_page(dst), sizeof(gpa)))
7303 return 0;
7304
7305 vmcs12->guest_pml_index--;
7306 }
7307
7308 return 0;
7309 }
7310
7311 static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
7312 struct kvm_memory_slot *memslot,
7313 gfn_t offset, unsigned long mask)
7314 {
7315 kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
7316 }
7317
7318 static void __pi_post_block(struct kvm_vcpu *vcpu)
7319 {
7320 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
7321 struct pi_desc old, new;
7322 unsigned int dest;
7323
7324 do {
7325 old.control = new.control = pi_desc->control;
7326 WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
7327 "Wakeup handler not enabled while the VCPU is blocked\n");
7328
7329 dest = cpu_physical_id(vcpu->cpu);
7330
7331 if (x2apic_enabled())
7332 new.ndst = dest;
7333 else
7334 new.ndst = (dest << 8) & 0xFF00;
7335
7336
7337 new.nv = POSTED_INTR_VECTOR;
7338 } while (cmpxchg64(&pi_desc->control, old.control,
7339 new.control) != old.control);
7340
7341 if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
7342 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
7343 list_del(&vcpu->blocked_vcpu_list);
7344 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
7345 vcpu->pre_pcpu = -1;
7346 }
7347 }
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362 static int pi_pre_block(struct kvm_vcpu *vcpu)
7363 {
7364 unsigned int dest;
7365 struct pi_desc old, new;
7366 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
7367
7368 if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
7369 !irq_remapping_cap(IRQ_POSTING_CAP) ||
7370 !kvm_vcpu_apicv_active(vcpu))
7371 return 0;
7372
7373 WARN_ON(irqs_disabled());
7374 local_irq_disable();
7375 if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
7376 vcpu->pre_pcpu = vcpu->cpu;
7377 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
7378 list_add_tail(&vcpu->blocked_vcpu_list,
7379 &per_cpu(blocked_vcpu_on_cpu,
7380 vcpu->pre_pcpu));
7381 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
7382 }
7383
7384 do {
7385 old.control = new.control = pi_desc->control;
7386
7387 WARN((pi_desc->sn == 1),
7388 "Warning: SN field of posted-interrupts "
7389 "is set before blocking\n");
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399 dest = cpu_physical_id(vcpu->pre_pcpu);
7400
7401 if (x2apic_enabled())
7402 new.ndst = dest;
7403 else
7404 new.ndst = (dest << 8) & 0xFF00;
7405
7406
7407 new.nv = POSTED_INTR_WAKEUP_VECTOR;
7408 } while (cmpxchg64(&pi_desc->control, old.control,
7409 new.control) != old.control);
7410
7411
7412 if (pi_test_on(pi_desc) == 1)
7413 __pi_post_block(vcpu);
7414
7415 local_irq_enable();
7416 return (vcpu->pre_pcpu == -1);
7417 }
7418
7419 static int vmx_pre_block(struct kvm_vcpu *vcpu)
7420 {
7421 if (pi_pre_block(vcpu))
7422 return 1;
7423
7424 if (kvm_lapic_hv_timer_in_use(vcpu))
7425 kvm_lapic_switch_to_sw_timer(vcpu);
7426
7427 return 0;
7428 }
7429
7430 static void pi_post_block(struct kvm_vcpu *vcpu)
7431 {
7432 if (vcpu->pre_pcpu == -1)
7433 return;
7434
7435 WARN_ON(irqs_disabled());
7436 local_irq_disable();
7437 __pi_post_block(vcpu);
7438 local_irq_enable();
7439 }
7440
7441 static void vmx_post_block(struct kvm_vcpu *vcpu)
7442 {
7443 if (kvm_x86_ops->set_hv_timer)
7444 kvm_lapic_switch_to_hv_timer(vcpu);
7445
7446 pi_post_block(vcpu);
7447 }
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458 static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
7459 uint32_t guest_irq, bool set)
7460 {
7461 struct kvm_kernel_irq_routing_entry *e;
7462 struct kvm_irq_routing_table *irq_rt;
7463 struct kvm_lapic_irq irq;
7464 struct kvm_vcpu *vcpu;
7465 struct vcpu_data vcpu_info;
7466 int idx, ret = 0;
7467
7468 if (!kvm_arch_has_assigned_device(kvm) ||
7469 !irq_remapping_cap(IRQ_POSTING_CAP) ||
7470 !kvm_vcpu_apicv_active(kvm->vcpus[0]))
7471 return 0;
7472
7473 idx = srcu_read_lock(&kvm->irq_srcu);
7474 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
7475 if (guest_irq >= irq_rt->nr_rt_entries ||
7476 hlist_empty(&irq_rt->map[guest_irq])) {
7477 pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
7478 guest_irq, irq_rt->nr_rt_entries);
7479 goto out;
7480 }
7481
7482 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
7483 if (e->type != KVM_IRQ_ROUTING_MSI)
7484 continue;
7485
7486
7487
7488
7489
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501 kvm_set_msi_irq(kvm, e, &irq);
7502 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
7503 !kvm_irq_is_postable(&irq)) {
7504
7505
7506
7507
7508 ret = irq_set_vcpu_affinity(host_irq, NULL);
7509 if (ret < 0) {
7510 printk(KERN_INFO
7511 "failed to back to remapped mode, irq: %u\n",
7512 host_irq);
7513 goto out;
7514 }
7515
7516 continue;
7517 }
7518
7519 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
7520 vcpu_info.vector = irq.vector;
7521
7522 trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
7523 vcpu_info.vector, vcpu_info.pi_desc_addr, set);
7524
7525 if (set)
7526 ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
7527 else
7528 ret = irq_set_vcpu_affinity(host_irq, NULL);
7529
7530 if (ret < 0) {
7531 printk(KERN_INFO "%s: failed to update PI IRTE\n",
7532 __func__);
7533 goto out;
7534 }
7535 }
7536
7537 ret = 0;
7538 out:
7539 srcu_read_unlock(&kvm->irq_srcu, idx);
7540 return ret;
7541 }
7542
7543 static void vmx_setup_mce(struct kvm_vcpu *vcpu)
7544 {
7545 if (vcpu->arch.mcg_cap & MCG_LMCE_P)
7546 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
7547 FEATURE_CONTROL_LMCE;
7548 else
7549 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
7550 ~FEATURE_CONTROL_LMCE;
7551 }
7552
7553 static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
7554 {
7555
7556 if (to_vmx(vcpu)->nested.nested_run_pending)
7557 return 0;
7558 return 1;
7559 }
7560
7561 static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
7562 {
7563 struct vcpu_vmx *vmx = to_vmx(vcpu);
7564
7565 vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
7566 if (vmx->nested.smm.guest_mode)
7567 nested_vmx_vmexit(vcpu, -1, 0, 0);
7568
7569 vmx->nested.smm.vmxon = vmx->nested.vmxon;
7570 vmx->nested.vmxon = false;
7571 vmx_clear_hlt(vcpu);
7572 return 0;
7573 }
7574
7575 static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
7576 {
7577 struct vcpu_vmx *vmx = to_vmx(vcpu);
7578 int ret;
7579
7580 if (vmx->nested.smm.vmxon) {
7581 vmx->nested.vmxon = true;
7582 vmx->nested.smm.vmxon = false;
7583 }
7584
7585 if (vmx->nested.smm.guest_mode) {
7586 ret = nested_vmx_enter_non_root_mode(vcpu, false);
7587 if (ret)
7588 return ret;
7589
7590 vmx->nested.smm.guest_mode = false;
7591 }
7592 return 0;
7593 }
7594
7595 static int enable_smi_window(struct kvm_vcpu *vcpu)
7596 {
7597 return 0;
7598 }
7599
7600 static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
7601 {
7602 return false;
7603 }
7604
7605 static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
7606 {
7607 return to_vmx(vcpu)->nested.vmxon;
7608 }
7609
7610 static __init int hardware_setup(void)
7611 {
7612 unsigned long host_bndcfgs;
7613 struct desc_ptr dt;
7614 int r, i;
7615
7616 rdmsrl_safe(MSR_EFER, &host_efer);
7617
7618 store_idt(&dt);
7619 host_idt_base = dt.address;
7620
7621 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
7622 kvm_define_shared_msr(i, vmx_msr_index[i]);
7623
7624 if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
7625 return -EIO;
7626
7627 if (boot_cpu_has(X86_FEATURE_NX))
7628 kvm_enable_efer_bits(EFER_NX);
7629
7630 if (boot_cpu_has(X86_FEATURE_MPX)) {
7631 rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
7632 WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
7633 }
7634
7635 if (boot_cpu_has(X86_FEATURE_XSAVES))
7636 rdmsrl(MSR_IA32_XSS, host_xss);
7637
7638 if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
7639 !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
7640 enable_vpid = 0;
7641
7642 if (!cpu_has_vmx_ept() ||
7643 !cpu_has_vmx_ept_4levels() ||
7644 !cpu_has_vmx_ept_mt_wb() ||
7645 !cpu_has_vmx_invept_global())
7646 enable_ept = 0;
7647
7648 if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
7649 enable_ept_ad_bits = 0;
7650
7651 if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
7652 enable_unrestricted_guest = 0;
7653
7654 if (!cpu_has_vmx_flexpriority())
7655 flexpriority_enabled = 0;
7656
7657 if (!cpu_has_virtual_nmis())
7658 enable_vnmi = 0;
7659
7660
7661
7662
7663
7664
7665 if (!flexpriority_enabled)
7666 kvm_x86_ops->set_apic_access_page_addr = NULL;
7667
7668 if (!cpu_has_vmx_tpr_shadow())
7669 kvm_x86_ops->update_cr8_intercept = NULL;
7670
7671 if (enable_ept && !cpu_has_vmx_ept_2m_page())
7672 kvm_disable_largepages();
7673
7674 #if IS_ENABLED(CONFIG_HYPERV)
7675 if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
7676 && enable_ept) {
7677 kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb;
7678 kvm_x86_ops->tlb_remote_flush_with_range =
7679 hv_remote_flush_tlb_with_range;
7680 }
7681 #endif
7682
7683 if (!cpu_has_vmx_ple()) {
7684 ple_gap = 0;
7685 ple_window = 0;
7686 ple_window_grow = 0;
7687 ple_window_max = 0;
7688 ple_window_shrink = 0;
7689 }
7690
7691 if (!cpu_has_vmx_apicv()) {
7692 enable_apicv = 0;
7693 kvm_x86_ops->sync_pir_to_irr = NULL;
7694 }
7695
7696 if (cpu_has_vmx_tsc_scaling()) {
7697 kvm_has_tsc_control = true;
7698 kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
7699 kvm_tsc_scaling_ratio_frac_bits = 48;
7700 }
7701
7702 set_bit(0, vmx_vpid_bitmap);
7703
7704 if (enable_ept)
7705 vmx_enable_tdp();
7706 else
7707 kvm_disable_tdp();
7708
7709
7710
7711
7712
7713 if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
7714 enable_pml = 0;
7715
7716 if (!enable_pml) {
7717 kvm_x86_ops->slot_enable_log_dirty = NULL;
7718 kvm_x86_ops->slot_disable_log_dirty = NULL;
7719 kvm_x86_ops->flush_log_dirty = NULL;
7720 kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
7721 }
7722
7723 if (!cpu_has_vmx_preemption_timer())
7724 enable_preemption_timer = false;
7725
7726 if (enable_preemption_timer) {
7727 u64 use_timer_freq = 5000ULL * 1000 * 1000;
7728 u64 vmx_msr;
7729
7730 rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
7731 cpu_preemption_timer_multi =
7732 vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
7733
7734 if (tsc_khz)
7735 use_timer_freq = (u64)tsc_khz * 1000;
7736 use_timer_freq >>= cpu_preemption_timer_multi;
7737
7738
7739
7740
7741
7742
7743 if (use_timer_freq > 0xffffffffu / 10)
7744 enable_preemption_timer = false;
7745 }
7746
7747 if (!enable_preemption_timer) {
7748 kvm_x86_ops->set_hv_timer = NULL;
7749 kvm_x86_ops->cancel_hv_timer = NULL;
7750 kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
7751 }
7752
7753 kvm_set_posted_intr_wakeup_handler(wakeup_handler);
7754
7755 kvm_mce_cap_supported |= MCG_LMCE_P;
7756
7757 if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST)
7758 return -EINVAL;
7759 if (!enable_ept || !cpu_has_vmx_intel_pt())
7760 pt_mode = PT_MODE_SYSTEM;
7761
7762 if (nested) {
7763 nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
7764 vmx_capability.ept);
7765
7766 r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
7767 if (r)
7768 return r;
7769 }
7770
7771 r = alloc_kvm_area();
7772 if (r)
7773 nested_vmx_hardware_unsetup();
7774 return r;
7775 }
7776
7777 static __exit void hardware_unsetup(void)
7778 {
7779 if (nested)
7780 nested_vmx_hardware_unsetup();
7781
7782 free_kvm_area();
7783 }
7784
7785 static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
7786 .cpu_has_kvm_support = cpu_has_kvm_support,
7787 .disabled_by_bios = vmx_disabled_by_bios,
7788 .hardware_setup = hardware_setup,
7789 .hardware_unsetup = hardware_unsetup,
7790 .check_processor_compatibility = vmx_check_processor_compat,
7791 .hardware_enable = hardware_enable,
7792 .hardware_disable = hardware_disable,
7793 .cpu_has_accelerated_tpr = report_flexpriority,
7794 .has_emulated_msr = vmx_has_emulated_msr,
7795
7796 .vm_init = vmx_vm_init,
7797 .vm_alloc = vmx_vm_alloc,
7798 .vm_free = vmx_vm_free,
7799
7800 .vcpu_create = vmx_create_vcpu,
7801 .vcpu_free = vmx_free_vcpu,
7802 .vcpu_reset = vmx_vcpu_reset,
7803
7804 .prepare_guest_switch = vmx_prepare_switch_to_guest,
7805 .vcpu_load = vmx_vcpu_load,
7806 .vcpu_put = vmx_vcpu_put,
7807
7808 .update_bp_intercept = update_exception_bitmap,
7809 .get_msr_feature = vmx_get_msr_feature,
7810 .get_msr = vmx_get_msr,
7811 .set_msr = vmx_set_msr,
7812 .get_segment_base = vmx_get_segment_base,
7813 .get_segment = vmx_get_segment,
7814 .set_segment = vmx_set_segment,
7815 .get_cpl = vmx_get_cpl,
7816 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
7817 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
7818 .decache_cr3 = vmx_decache_cr3,
7819 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
7820 .set_cr0 = vmx_set_cr0,
7821 .set_cr3 = vmx_set_cr3,
7822 .set_cr4 = vmx_set_cr4,
7823 .set_efer = vmx_set_efer,
7824 .get_idt = vmx_get_idt,
7825 .set_idt = vmx_set_idt,
7826 .get_gdt = vmx_get_gdt,
7827 .set_gdt = vmx_set_gdt,
7828 .get_dr6 = vmx_get_dr6,
7829 .set_dr6 = vmx_set_dr6,
7830 .set_dr7 = vmx_set_dr7,
7831 .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
7832 .cache_reg = vmx_cache_reg,
7833 .get_rflags = vmx_get_rflags,
7834 .set_rflags = vmx_set_rflags,
7835
7836 .tlb_flush = vmx_flush_tlb,
7837 .tlb_flush_gva = vmx_flush_tlb_gva,
7838
7839 .run = vmx_vcpu_run,
7840 .handle_exit = vmx_handle_exit,
7841 .skip_emulated_instruction = skip_emulated_instruction,
7842 .set_interrupt_shadow = vmx_set_interrupt_shadow,
7843 .get_interrupt_shadow = vmx_get_interrupt_shadow,
7844 .patch_hypercall = vmx_patch_hypercall,
7845 .set_irq = vmx_inject_irq,
7846 .set_nmi = vmx_inject_nmi,
7847 .queue_exception = vmx_queue_exception,
7848 .cancel_injection = vmx_cancel_injection,
7849 .interrupt_allowed = vmx_interrupt_allowed,
7850 .nmi_allowed = vmx_nmi_allowed,
7851 .get_nmi_mask = vmx_get_nmi_mask,
7852 .set_nmi_mask = vmx_set_nmi_mask,
7853 .enable_nmi_window = enable_nmi_window,
7854 .enable_irq_window = enable_irq_window,
7855 .update_cr8_intercept = update_cr8_intercept,
7856 .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
7857 .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
7858 .get_enable_apicv = vmx_get_enable_apicv,
7859 .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
7860 .load_eoi_exitmap = vmx_load_eoi_exitmap,
7861 .apicv_post_state_restore = vmx_apicv_post_state_restore,
7862 .hwapic_irr_update = vmx_hwapic_irr_update,
7863 .hwapic_isr_update = vmx_hwapic_isr_update,
7864 .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
7865 .sync_pir_to_irr = vmx_sync_pir_to_irr,
7866 .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
7867 .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
7868
7869 .set_tss_addr = vmx_set_tss_addr,
7870 .set_identity_map_addr = vmx_set_identity_map_addr,
7871 .get_tdp_level = get_ept_level,
7872 .get_mt_mask = vmx_get_mt_mask,
7873
7874 .get_exit_info = vmx_get_exit_info,
7875
7876 .get_lpage_level = vmx_get_lpage_level,
7877
7878 .cpuid_update = vmx_cpuid_update,
7879
7880 .rdtscp_supported = vmx_rdtscp_supported,
7881 .invpcid_supported = vmx_invpcid_supported,
7882
7883 .set_supported_cpuid = vmx_set_supported_cpuid,
7884
7885 .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
7886
7887 .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
7888 .write_l1_tsc_offset = vmx_write_l1_tsc_offset,
7889
7890 .set_tdp_cr3 = vmx_set_cr3,
7891
7892 .check_intercept = vmx_check_intercept,
7893 .handle_exit_irqoff = vmx_handle_exit_irqoff,
7894 .mpx_supported = vmx_mpx_supported,
7895 .xsaves_supported = vmx_xsaves_supported,
7896 .umip_emulated = vmx_umip_emulated,
7897 .pt_supported = vmx_pt_supported,
7898 .pku_supported = vmx_pku_supported,
7899
7900 .request_immediate_exit = vmx_request_immediate_exit,
7901
7902 .sched_in = vmx_sched_in,
7903
7904 .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
7905 .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
7906 .flush_log_dirty = vmx_flush_log_dirty,
7907 .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
7908 .write_log_dirty = vmx_write_pml_buffer,
7909
7910 .pre_block = vmx_pre_block,
7911 .post_block = vmx_post_block,
7912
7913 .pmu_ops = &intel_pmu_ops,
7914
7915 .update_pi_irte = vmx_update_pi_irte,
7916
7917 #ifdef CONFIG_X86_64
7918 .set_hv_timer = vmx_set_hv_timer,
7919 .cancel_hv_timer = vmx_cancel_hv_timer,
7920 #endif
7921
7922 .setup_mce = vmx_setup_mce,
7923
7924 .smi_allowed = vmx_smi_allowed,
7925 .pre_enter_smm = vmx_pre_enter_smm,
7926 .pre_leave_smm = vmx_pre_leave_smm,
7927 .enable_smi_window = enable_smi_window,
7928
7929 .check_nested_events = NULL,
7930 .get_nested_state = NULL,
7931 .set_nested_state = NULL,
7932 .get_vmcs12_pages = NULL,
7933 .nested_enable_evmcs = NULL,
7934 .nested_get_evmcs_version = NULL,
7935 .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
7936 .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
7937 };
7938
7939 static void vmx_cleanup_l1d_flush(void)
7940 {
7941 if (vmx_l1d_flush_pages) {
7942 free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
7943 vmx_l1d_flush_pages = NULL;
7944 }
7945
7946 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
7947 }
7948
7949 static void vmx_exit(void)
7950 {
7951 #ifdef CONFIG_KEXEC_CORE
7952 RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
7953 synchronize_rcu();
7954 #endif
7955
7956 kvm_exit();
7957
7958 #if IS_ENABLED(CONFIG_HYPERV)
7959 if (static_branch_unlikely(&enable_evmcs)) {
7960 int cpu;
7961 struct hv_vp_assist_page *vp_ap;
7962
7963
7964
7965
7966
7967 for_each_online_cpu(cpu) {
7968 vp_ap = hv_get_vp_assist_page(cpu);
7969
7970 if (!vp_ap)
7971 continue;
7972
7973 vp_ap->nested_control.features.directhypercall = 0;
7974 vp_ap->current_nested_vmcs = 0;
7975 vp_ap->enlighten_vmentry = 0;
7976 }
7977
7978 static_branch_disable(&enable_evmcs);
7979 }
7980 #endif
7981 vmx_cleanup_l1d_flush();
7982 }
7983 module_exit(vmx_exit);
7984
7985 static int __init vmx_init(void)
7986 {
7987 int r, cpu;
7988
7989 #if IS_ENABLED(CONFIG_HYPERV)
7990
7991
7992
7993
7994
7995 if (enlightened_vmcs &&
7996 ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
7997 (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
7998 KVM_EVMCS_VERSION) {
7999 int cpu;
8000
8001
8002 for_each_online_cpu(cpu) {
8003 if (!hv_get_vp_assist_page(cpu)) {
8004 enlightened_vmcs = false;
8005 break;
8006 }
8007 }
8008
8009 if (enlightened_vmcs) {
8010 pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
8011 static_branch_enable(&enable_evmcs);
8012 }
8013
8014 if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
8015 vmx_x86_ops.enable_direct_tlbflush
8016 = hv_enable_direct_tlbflush;
8017
8018 } else {
8019 enlightened_vmcs = false;
8020 }
8021 #endif
8022
8023 r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
8024 __alignof__(struct vcpu_vmx), THIS_MODULE);
8025 if (r)
8026 return r;
8027
8028
8029
8030
8031
8032
8033
8034
8035 r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
8036 if (r) {
8037 vmx_exit();
8038 return r;
8039 }
8040
8041 for_each_possible_cpu(cpu) {
8042 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
8043 INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
8044 spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
8045 }
8046
8047 #ifdef CONFIG_KEXEC_CORE
8048 rcu_assign_pointer(crash_vmclear_loaded_vmcss,
8049 crash_vmclear_local_loaded_vmcss);
8050 #endif
8051 vmx_check_vmcs12_offsets();
8052
8053 return 0;
8054 }
8055 module_init(vmx_init);