This source file includes following definitions.
- vmx_setup_l1d_flush
 
- vmentry_l1d_flush_parse
 
- vmentry_l1d_flush_set
 
- vmentry_l1d_flush_get
 
- vmread_error
 
- vmwrite_error
 
- vmclear_error
 
- vmptrld_error
 
- invvpid_error
 
- invept_error
 
- check_ept_pointer_match
 
- kvm_fill_hv_flush_list_func
 
- __hv_remote_flush_tlb_with_range
 
- hv_remote_flush_tlb_with_range
 
- hv_remote_flush_tlb
 
- hv_enable_direct_tlbflush
 
- cpu_has_broken_vmx_preemption_timer
 
- cpu_need_virtualize_apic_accesses
 
- report_flexpriority
 
- __find_msr_index
 
- find_msr_entry
 
- loaded_vmcs_init
 
- crash_vmclear_local_loaded_vmcss
 
- __loaded_vmcs_clear
 
- loaded_vmcs_clear
 
- vmx_segment_cache_test_set
 
- vmx_read_guest_seg_selector
 
- vmx_read_guest_seg_base
 
- vmx_read_guest_seg_limit
 
- vmx_read_guest_seg_ar
 
- update_exception_bitmap
 
- msr_write_intercepted
 
- clear_atomic_switch_msr_special
 
- find_msr
 
- clear_atomic_switch_msr
 
- add_atomic_switch_msr_special
 
- add_atomic_switch_msr
 
- update_transition_efer
 
- segment_base
 
- pt_load_msr
 
- pt_save_msr
 
- pt_guest_enter
 
- pt_guest_exit
 
- vmx_set_host_fs_gs
 
- vmx_prepare_switch_to_guest
 
- vmx_prepare_switch_to_host
 
- vmx_read_guest_kernel_gs_base
 
- vmx_write_guest_kernel_gs_base
 
- vmx_vcpu_pi_load
 
- vmx_vcpu_load_vmcs
 
- vmx_vcpu_load
 
- vmx_vcpu_pi_put
 
- vmx_vcpu_put
 
- emulation_required
 
- vmx_get_rflags
 
- vmx_set_rflags
 
- vmx_get_interrupt_shadow
 
- vmx_set_interrupt_shadow
 
- vmx_rtit_ctl_check
 
- skip_emulated_instruction
 
- vmx_clear_hlt
 
- vmx_queue_exception
 
- vmx_rdtscp_supported
 
- vmx_invpcid_supported
 
- move_msr_up
 
- setup_msrs
 
- vmx_read_l1_tsc_offset
 
- vmx_write_l1_tsc_offset
 
- nested_vmx_allowed
 
- vmx_feature_control_msr_valid
 
- vmx_get_msr_feature
 
- vmx_get_msr
 
- vmx_set_msr
 
- vmx_cache_reg
 
- cpu_has_kvm_support
 
- vmx_disabled_by_bios
 
- kvm_cpu_vmxon
 
- hardware_enable
 
- vmclear_local_loaded_vmcss
 
- kvm_cpu_vmxoff
 
- hardware_disable
 
- adjust_vmx_controls
 
- setup_vmcs_config
 
- alloc_vmcs_cpu
 
- free_vmcs
 
- free_loaded_vmcs
 
- alloc_loaded_vmcs
 
- free_kvm_area
 
- alloc_kvm_area
 
- fix_pmode_seg
 
- enter_pmode
 
- fix_rmode_seg
 
- enter_rmode
 
- vmx_set_efer
 
- enter_lmode
 
- exit_lmode
 
- vmx_flush_tlb_gva
 
- vmx_decache_cr0_guest_bits
 
- vmx_decache_cr3
 
- vmx_decache_cr4_guest_bits
 
- ept_load_pdptrs
 
- ept_save_pdptrs
 
- ept_update_paging_mode_cr0
 
- vmx_set_cr0
 
- get_ept_level
 
- construct_eptp
 
- vmx_set_cr3
 
- vmx_set_cr4
 
- vmx_get_segment
 
- vmx_get_segment_base
 
- vmx_get_cpl
 
- vmx_segment_access_rights
 
- vmx_set_segment
 
- vmx_get_cs_db_l_bits
 
- vmx_get_idt
 
- vmx_set_idt
 
- vmx_get_gdt
 
- vmx_set_gdt
 
- rmode_segment_valid
 
- code_segment_valid
 
- stack_segment_valid
 
- data_segment_valid
 
- tr_valid
 
- ldtr_valid
 
- cs_ss_rpl_check
 
- guest_state_valid
 
- init_rmode_tss
 
- init_rmode_identity_map
 
- seg_setup
 
- alloc_apic_access_page
 
- allocate_vpid
 
- free_vpid
 
- vmx_disable_intercept_for_msr
 
- vmx_enable_intercept_for_msr
 
- vmx_set_intercept_for_msr
 
- vmx_msr_bitmap_mode
 
- vmx_update_msr_bitmap_x2apic
 
- vmx_update_msr_bitmap
 
- pt_update_intercept_for_msr
 
- vmx_get_enable_apicv
 
- vmx_guest_apic_has_interrupt
 
- kvm_vcpu_trigger_posted_interrupt
 
- vmx_deliver_nested_posted_interrupt
 
- vmx_deliver_posted_interrupt
 
- vmx_set_constant_host_state
 
- set_cr4_guest_host_mask
 
- vmx_pin_based_exec_ctrl
 
- vmx_refresh_apicv_exec_ctrl
 
- vmx_exec_control
 
- vmx_compute_secondary_exec_control
 
- ept_set_mmio_spte_mask
 
- vmx_vcpu_setup
 
- vmx_vcpu_reset
 
- enable_irq_window
 
- enable_nmi_window
 
- vmx_inject_irq
 
- vmx_inject_nmi
 
- vmx_get_nmi_mask
 
- vmx_set_nmi_mask
 
- vmx_nmi_allowed
 
- vmx_interrupt_allowed
 
- vmx_set_tss_addr
 
- vmx_set_identity_map_addr
 
- rmode_exception
 
- handle_rmode_exception
 
- kvm_machine_check
 
- handle_machine_check
 
- handle_exception_nmi
 
- handle_external_interrupt
 
- handle_triple_fault
 
- handle_io
 
- vmx_patch_hypercall
 
- handle_set_cr0
 
- handle_set_cr4
 
- handle_desc
 
- handle_cr
 
- handle_dr
 
- vmx_get_dr6
 
- vmx_set_dr6
 
- vmx_sync_dirty_debug_regs
 
- vmx_set_dr7
 
- handle_cpuid
 
- handle_rdmsr
 
- handle_wrmsr
 
- handle_tpr_below_threshold
 
- handle_interrupt_window
 
- handle_halt
 
- handle_vmcall
 
- handle_invd
 
- handle_invlpg
 
- handle_rdpmc
 
- handle_wbinvd
 
- handle_xsetbv
 
- handle_apic_access
 
- handle_apic_eoi_induced
 
- handle_apic_write
 
- handle_task_switch
 
- handle_ept_violation
 
- handle_ept_misconfig
 
- handle_nmi_window
 
- handle_invalid_guest_state
 
- grow_ple_window
 
- shrink_ple_window
 
- wakeup_handler
 
- vmx_enable_tdp
 
- handle_pause
 
- handle_nop
 
- handle_mwait
 
- handle_invalid_op
 
- handle_monitor_trap
 
- handle_monitor
 
- handle_invpcid
 
- handle_pml_full
 
- handle_preemption_timer
 
- handle_vmx_instruction
 
- handle_encls
 
- vmx_get_exit_info
 
- vmx_destroy_pml_buffer
 
- vmx_flush_pml_buffer
 
- kvm_flush_pml_buffers
 
- vmx_dump_sel
 
- vmx_dump_dtsel
 
- dump_vmcs
 
- vmx_handle_exit
 
- vmx_l1d_flush
 
- update_cr8_intercept
 
- vmx_set_virtual_apic_mode
 
- vmx_set_apic_access_page_addr
 
- vmx_hwapic_isr_update
 
- vmx_set_rvi
 
- vmx_hwapic_irr_update
 
- vmx_sync_pir_to_irr
 
- vmx_dy_apicv_has_pending_interrupt
 
- vmx_load_eoi_exitmap
 
- vmx_apicv_post_state_restore
 
- handle_exception_nmi_irqoff
 
- handle_external_interrupt_irqoff
 
- vmx_handle_exit_irqoff
 
- vmx_has_emulated_msr
 
- vmx_pt_supported
 
- vmx_recover_nmi_blocking
 
- __vmx_complete_interrupts
 
- vmx_complete_interrupts
 
- vmx_cancel_injection
 
- atomic_switch_perf_msrs
 
- atomic_switch_umwait_control_msr
 
- vmx_update_hv_timer
 
- vmx_update_host_rsp
 
- vmx_vcpu_run
 
- vmx_vm_alloc
 
- vmx_vm_free
 
- vmx_free_vcpu
 
- vmx_create_vcpu
 
- vmx_vm_init
 
- vmx_check_processor_compat
 
- vmx_get_mt_mask
 
- vmx_get_lpage_level
 
- vmcs_set_secondary_exec_control
 
- nested_vmx_cr_fixed1_bits_update
 
- nested_vmx_entry_exit_ctls_update
 
- update_intel_pt_cfg
 
- vmx_cpuid_update
 
- vmx_set_supported_cpuid
 
- vmx_request_immediate_exit
 
- vmx_check_intercept_io
 
- vmx_check_intercept
 
- u64_shl_div_u64
 
- vmx_set_hv_timer
 
- vmx_cancel_hv_timer
 
- vmx_sched_in
 
- vmx_slot_enable_log_dirty
 
- vmx_slot_disable_log_dirty
 
- vmx_flush_log_dirty
 
- vmx_write_pml_buffer
 
- vmx_enable_log_dirty_pt_masked
 
- __pi_post_block
 
- pi_pre_block
 
- vmx_pre_block
 
- pi_post_block
 
- vmx_post_block
 
- vmx_update_pi_irte
 
- vmx_setup_mce
 
- vmx_smi_allowed
 
- vmx_pre_enter_smm
 
- vmx_pre_leave_smm
 
- enable_smi_window
 
- vmx_need_emulation_on_page_fault
 
- vmx_apic_init_signal_blocked
 
- hardware_setup
 
- hardware_unsetup
 
- vmx_cleanup_l1d_flush
 
- vmx_exit
 
- vmx_init
 
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 #include <linux/frame.h>
  17 #include <linux/highmem.h>
  18 #include <linux/hrtimer.h>
  19 #include <linux/kernel.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/module.h>
  22 #include <linux/moduleparam.h>
  23 #include <linux/mod_devicetable.h>
  24 #include <linux/mm.h>
  25 #include <linux/sched.h>
  26 #include <linux/sched/smt.h>
  27 #include <linux/slab.h>
  28 #include <linux/tboot.h>
  29 #include <linux/trace_events.h>
  30 
  31 #include <asm/apic.h>
  32 #include <asm/asm.h>
  33 #include <asm/cpu.h>
  34 #include <asm/debugreg.h>
  35 #include <asm/desc.h>
  36 #include <asm/fpu/internal.h>
  37 #include <asm/io.h>
  38 #include <asm/irq_remapping.h>
  39 #include <asm/kexec.h>
  40 #include <asm/perf_event.h>
  41 #include <asm/mce.h>
  42 #include <asm/mmu_context.h>
  43 #include <asm/mshyperv.h>
  44 #include <asm/spec-ctrl.h>
  45 #include <asm/virtext.h>
  46 #include <asm/vmx.h>
  47 
  48 #include "capabilities.h"
  49 #include "cpuid.h"
  50 #include "evmcs.h"
  51 #include "irq.h"
  52 #include "kvm_cache_regs.h"
  53 #include "lapic.h"
  54 #include "mmu.h"
  55 #include "nested.h"
  56 #include "ops.h"
  57 #include "pmu.h"
  58 #include "trace.h"
  59 #include "vmcs.h"
  60 #include "vmcs12.h"
  61 #include "vmx.h"
  62 #include "x86.h"
  63 
  64 MODULE_AUTHOR("Qumranet");
  65 MODULE_LICENSE("GPL");
  66 
  67 static const struct x86_cpu_id vmx_cpu_id[] = {
  68         X86_FEATURE_MATCH(X86_FEATURE_VMX),
  69         {}
  70 };
  71 MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
  72 
  73 bool __read_mostly enable_vpid = 1;
  74 module_param_named(vpid, enable_vpid, bool, 0444);
  75 
  76 static bool __read_mostly enable_vnmi = 1;
  77 module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
  78 
  79 bool __read_mostly flexpriority_enabled = 1;
  80 module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
  81 
  82 bool __read_mostly enable_ept = 1;
  83 module_param_named(ept, enable_ept, bool, S_IRUGO);
  84 
  85 bool __read_mostly enable_unrestricted_guest = 1;
  86 module_param_named(unrestricted_guest,
  87                         enable_unrestricted_guest, bool, S_IRUGO);
  88 
  89 bool __read_mostly enable_ept_ad_bits = 1;
  90 module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
  91 
  92 static bool __read_mostly emulate_invalid_guest_state = true;
  93 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
  94 
  95 static bool __read_mostly fasteoi = 1;
  96 module_param(fasteoi, bool, S_IRUGO);
  97 
  98 bool __read_mostly enable_apicv = 1;
  99 module_param(enable_apicv, bool, S_IRUGO);
 100 
 101 
 102 
 103 
 104 
 105 
 106 static bool __read_mostly nested = 1;
 107 module_param(nested, bool, S_IRUGO);
 108 
 109 static u64 __read_mostly host_xss;
 110 
 111 bool __read_mostly enable_pml = 1;
 112 module_param_named(pml, enable_pml, bool, S_IRUGO);
 113 
 114 static bool __read_mostly dump_invalid_vmcs = 0;
 115 module_param(dump_invalid_vmcs, bool, 0644);
 116 
 117 #define MSR_BITMAP_MODE_X2APIC          1
 118 #define MSR_BITMAP_MODE_X2APIC_APICV    2
 119 
 120 #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
 121 
 122 
 123 static int __read_mostly cpu_preemption_timer_multi;
 124 static bool __read_mostly enable_preemption_timer = 1;
 125 #ifdef CONFIG_X86_64
 126 module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
 127 #endif
 128 
 129 #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
 130 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
 131 #define KVM_VM_CR0_ALWAYS_ON                            \
 132         (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST |      \
 133          X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
 134 #define KVM_CR4_GUEST_OWNED_BITS                                      \
 135         (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
 136          | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
 137 
 138 #define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
 139 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 140 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
 141 
 142 #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
 143 
 144 #define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \
 145         RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \
 146         RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
 147         RTIT_STATUS_BYTECNT))
 148 
 149 #define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
 150         (~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f)
 151 
 152 
 153 
 154 
 155 
 156 
 157 
 158 
 159 
 160 
 161 
 162 
 163 static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
 164 module_param(ple_gap, uint, 0444);
 165 
 166 static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 167 module_param(ple_window, uint, 0444);
 168 
 169 
 170 static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
 171 module_param(ple_window_grow, uint, 0444);
 172 
 173 
 174 static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
 175 module_param(ple_window_shrink, uint, 0444);
 176 
 177 
 178 static unsigned int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
 179 module_param(ple_window_max, uint, 0444);
 180 
 181 
 182 int __read_mostly pt_mode = PT_MODE_SYSTEM;
 183 module_param(pt_mode, int, S_IRUGO);
 184 
 185 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
 186 static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
 187 static DEFINE_MUTEX(vmx_l1d_flush_mutex);
 188 
 189 
 190 static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_L1D_FLUSH_AUTO;
 191 
 192 static const struct {
 193         const char *option;
 194         bool for_parse;
 195 } vmentry_l1d_param[] = {
 196         [VMENTER_L1D_FLUSH_AUTO]         = {"auto", true},
 197         [VMENTER_L1D_FLUSH_NEVER]        = {"never", true},
 198         [VMENTER_L1D_FLUSH_COND]         = {"cond", true},
 199         [VMENTER_L1D_FLUSH_ALWAYS]       = {"always", true},
 200         [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
 201         [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
 202 };
 203 
 204 #define L1D_CACHE_ORDER 4
 205 static void *vmx_l1d_flush_pages;
 206 
 207 static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 208 {
 209         struct page *page;
 210         unsigned int i;
 211 
 212         if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
 213                 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
 214                 return 0;
 215         }
 216 
 217         if (!enable_ept) {
 218                 l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
 219                 return 0;
 220         }
 221 
 222         if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
 223                 u64 msr;
 224 
 225                 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
 226                 if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
 227                         l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
 228                         return 0;
 229                 }
 230         }
 231 
 232         
 233         if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
 234                 switch (l1tf_mitigation) {
 235                 case L1TF_MITIGATION_OFF:
 236                         l1tf = VMENTER_L1D_FLUSH_NEVER;
 237                         break;
 238                 case L1TF_MITIGATION_FLUSH_NOWARN:
 239                 case L1TF_MITIGATION_FLUSH:
 240                 case L1TF_MITIGATION_FLUSH_NOSMT:
 241                         l1tf = VMENTER_L1D_FLUSH_COND;
 242                         break;
 243                 case L1TF_MITIGATION_FULL:
 244                 case L1TF_MITIGATION_FULL_FORCE:
 245                         l1tf = VMENTER_L1D_FLUSH_ALWAYS;
 246                         break;
 247                 }
 248         } else if (l1tf_mitigation == L1TF_MITIGATION_FULL_FORCE) {
 249                 l1tf = VMENTER_L1D_FLUSH_ALWAYS;
 250         }
 251 
 252         if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages &&
 253             !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) {
 254                 
 255 
 256 
 257 
 258                 page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER);
 259                 if (!page)
 260                         return -ENOMEM;
 261                 vmx_l1d_flush_pages = page_address(page);
 262 
 263                 
 264 
 265 
 266 
 267 
 268                 for (i = 0; i < 1u << L1D_CACHE_ORDER; ++i) {
 269                         memset(vmx_l1d_flush_pages + i * PAGE_SIZE, i + 1,
 270                                PAGE_SIZE);
 271                 }
 272         }
 273 
 274         l1tf_vmx_mitigation = l1tf;
 275 
 276         if (l1tf != VMENTER_L1D_FLUSH_NEVER)
 277                 static_branch_enable(&vmx_l1d_should_flush);
 278         else
 279                 static_branch_disable(&vmx_l1d_should_flush);
 280 
 281         if (l1tf == VMENTER_L1D_FLUSH_COND)
 282                 static_branch_enable(&vmx_l1d_flush_cond);
 283         else
 284                 static_branch_disable(&vmx_l1d_flush_cond);
 285         return 0;
 286 }
 287 
 288 static int vmentry_l1d_flush_parse(const char *s)
 289 {
 290         unsigned int i;
 291 
 292         if (s) {
 293                 for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
 294                         if (vmentry_l1d_param[i].for_parse &&
 295                             sysfs_streq(s, vmentry_l1d_param[i].option))
 296                                 return i;
 297                 }
 298         }
 299         return -EINVAL;
 300 }
 301 
 302 static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
 303 {
 304         int l1tf, ret;
 305 
 306         l1tf = vmentry_l1d_flush_parse(s);
 307         if (l1tf < 0)
 308                 return l1tf;
 309 
 310         if (!boot_cpu_has(X86_BUG_L1TF))
 311                 return 0;
 312 
 313         
 314 
 315 
 316 
 317 
 318 
 319         if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_AUTO) {
 320                 vmentry_l1d_flush_param = l1tf;
 321                 return 0;
 322         }
 323 
 324         mutex_lock(&vmx_l1d_flush_mutex);
 325         ret = vmx_setup_l1d_flush(l1tf);
 326         mutex_unlock(&vmx_l1d_flush_mutex);
 327         return ret;
 328 }
 329 
 330 static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
 331 {
 332         if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
 333                 return sprintf(s, "???\n");
 334 
 335         return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
 336 }
 337 
 338 static const struct kernel_param_ops vmentry_l1d_flush_ops = {
 339         .set = vmentry_l1d_flush_set,
 340         .get = vmentry_l1d_flush_get,
 341 };
 342 module_param_cb(vmentry_l1d_flush, &vmentry_l1d_flush_ops, NULL, 0644);
 343 
 344 static bool guest_state_valid(struct kvm_vcpu *vcpu);
 345 static u32 vmx_segment_access_rights(struct kvm_segment *var);
 346 static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
 347                                                           u32 msr, int type);
 348 
 349 void vmx_vmexit(void);
 350 
 351 #define vmx_insn_failed(fmt...)         \
 352 do {                                    \
 353         WARN_ONCE(1, fmt);              \
 354         pr_warn_ratelimited(fmt);       \
 355 } while (0)
 356 
 357 asmlinkage void vmread_error(unsigned long field, bool fault)
 358 {
 359         if (fault)
 360                 kvm_spurious_fault();
 361         else
 362                 vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
 363 }
 364 
 365 noinline void vmwrite_error(unsigned long field, unsigned long value)
 366 {
 367         vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n",
 368                         field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
 369 }
 370 
 371 noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
 372 {
 373         vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr);
 374 }
 375 
 376 noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
 377 {
 378         vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr);
 379 }
 380 
 381 noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
 382 {
 383         vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
 384                         ext, vpid, gva);
 385 }
 386 
 387 noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
 388 {
 389         vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
 390                         ext, eptp, gpa);
 391 }
 392 
 393 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 394 DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 395 
 396 
 397 
 398 
 399 static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 400 
 401 
 402 
 403 
 404 
 405 static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
 406 static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
 407 
 408 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 409 static DEFINE_SPINLOCK(vmx_vpid_lock);
 410 
 411 struct vmcs_config vmcs_config;
 412 struct vmx_capability vmx_capability;
 413 
 414 #define VMX_SEGMENT_FIELD(seg)                                  \
 415         [VCPU_SREG_##seg] = {                                   \
 416                 .selector = GUEST_##seg##_SELECTOR,             \
 417                 .base = GUEST_##seg##_BASE,                     \
 418                 .limit = GUEST_##seg##_LIMIT,                   \
 419                 .ar_bytes = GUEST_##seg##_AR_BYTES,             \
 420         }
 421 
 422 static const struct kvm_vmx_segment_field {
 423         unsigned selector;
 424         unsigned base;
 425         unsigned limit;
 426         unsigned ar_bytes;
 427 } kvm_vmx_segment_fields[] = {
 428         VMX_SEGMENT_FIELD(CS),
 429         VMX_SEGMENT_FIELD(DS),
 430         VMX_SEGMENT_FIELD(ES),
 431         VMX_SEGMENT_FIELD(FS),
 432         VMX_SEGMENT_FIELD(GS),
 433         VMX_SEGMENT_FIELD(SS),
 434         VMX_SEGMENT_FIELD(TR),
 435         VMX_SEGMENT_FIELD(LDTR),
 436 };
 437 
 438 u64 host_efer;
 439 static unsigned long host_idt_base;
 440 
 441 
 442 
 443 
 444 
 445 
 446 
 447 
 448 const u32 vmx_msr_index[] = {
 449 #ifdef CONFIG_X86_64
 450         MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
 451 #endif
 452         MSR_EFER, MSR_TSC_AUX, MSR_STAR,
 453 };
 454 
 455 #if IS_ENABLED(CONFIG_HYPERV)
 456 static bool __read_mostly enlightened_vmcs = true;
 457 module_param(enlightened_vmcs, bool, 0444);
 458 
 459 
 460 static void check_ept_pointer_match(struct kvm *kvm)
 461 {
 462         struct kvm_vcpu *vcpu;
 463         u64 tmp_eptp = INVALID_PAGE;
 464         int i;
 465 
 466         kvm_for_each_vcpu(i, vcpu, kvm) {
 467                 if (!VALID_PAGE(tmp_eptp)) {
 468                         tmp_eptp = to_vmx(vcpu)->ept_pointer;
 469                 } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
 470                         to_kvm_vmx(kvm)->ept_pointers_match
 471                                 = EPT_POINTERS_MISMATCH;
 472                         return;
 473                 }
 474         }
 475 
 476         to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
 477 }
 478 
 479 static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
 480                 void *data)
 481 {
 482         struct kvm_tlb_range *range = data;
 483 
 484         return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
 485                         range->pages);
 486 }
 487 
 488 static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm,
 489                 struct kvm_vcpu *vcpu, struct kvm_tlb_range *range)
 490 {
 491         u64 ept_pointer = to_vmx(vcpu)->ept_pointer;
 492 
 493         
 494 
 495 
 496 
 497 
 498         if (range)
 499                 return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK,
 500                                 kvm_fill_hv_flush_list_func, (void *)range);
 501         else
 502                 return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK);
 503 }
 504 
 505 static int hv_remote_flush_tlb_with_range(struct kvm *kvm,
 506                 struct kvm_tlb_range *range)
 507 {
 508         struct kvm_vcpu *vcpu;
 509         int ret = 0, i;
 510 
 511         spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
 512 
 513         if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
 514                 check_ept_pointer_match(kvm);
 515 
 516         if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
 517                 kvm_for_each_vcpu(i, vcpu, kvm) {
 518                         
 519                         if (VALID_PAGE(to_vmx(vcpu)->ept_pointer))
 520                                 ret |= __hv_remote_flush_tlb_with_range(
 521                                         kvm, vcpu, range);
 522                 }
 523         } else {
 524                 ret = __hv_remote_flush_tlb_with_range(kvm,
 525                                 kvm_get_vcpu(kvm, 0), range);
 526         }
 527 
 528         spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
 529         return ret;
 530 }
 531 static int hv_remote_flush_tlb(struct kvm *kvm)
 532 {
 533         return hv_remote_flush_tlb_with_range(kvm, NULL);
 534 }
 535 
 536 static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
 537 {
 538         struct hv_enlightened_vmcs *evmcs;
 539         struct hv_partition_assist_pg **p_hv_pa_pg =
 540                         &vcpu->kvm->arch.hyperv.hv_pa_pg;
 541         
 542 
 543 
 544 
 545         if (!*p_hv_pa_pg)
 546                 *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
 547 
 548         if (!*p_hv_pa_pg)
 549                 return -ENOMEM;
 550 
 551         evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
 552 
 553         evmcs->partition_assist_page =
 554                 __pa(*p_hv_pa_pg);
 555         evmcs->hv_vm_id = (unsigned long)vcpu->kvm;
 556         evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
 557 
 558         return 0;
 559 }
 560 
 561 #endif 
 562 
 563 
 564 
 565 
 566 
 567 
 568 static u32 vmx_preemption_cpu_tfms[] = {
 569 
 570 0x000206E6,
 571 
 572 
 573 
 574 0x00020652,
 575 
 576 0x00020655,
 577 
 578 
 579 
 580 
 581 
 582 
 583 0x000106E5,
 584 
 585 0x000106A0,
 586 
 587 0x000106A1,
 588 
 589 0x000106A4,
 590  
 591  
 592  
 593 0x000106A5,
 594  
 595 0x000306A8,
 596 };
 597 
 598 static inline bool cpu_has_broken_vmx_preemption_timer(void)
 599 {
 600         u32 eax = cpuid_eax(0x00000001), i;
 601 
 602         
 603         eax &= ~(0x3U << 14 | 0xfU << 28);
 604         for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
 605                 if (eax == vmx_preemption_cpu_tfms[i])
 606                         return true;
 607 
 608         return false;
 609 }
 610 
 611 static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
 612 {
 613         return flexpriority_enabled && lapic_in_kernel(vcpu);
 614 }
 615 
 616 static inline bool report_flexpriority(void)
 617 {
 618         return flexpriority_enabled;
 619 }
 620 
 621 static inline int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 622 {
 623         int i;
 624 
 625         for (i = 0; i < vmx->nmsrs; ++i)
 626                 if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
 627                         return i;
 628         return -1;
 629 }
 630 
 631 struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
 632 {
 633         int i;
 634 
 635         i = __find_msr_index(vmx, msr);
 636         if (i >= 0)
 637                 return &vmx->guest_msrs[i];
 638         return NULL;
 639 }
 640 
 641 void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
 642 {
 643         vmcs_clear(loaded_vmcs->vmcs);
 644         if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
 645                 vmcs_clear(loaded_vmcs->shadow_vmcs);
 646         loaded_vmcs->cpu = -1;
 647         loaded_vmcs->launched = 0;
 648 }
 649 
 650 #ifdef CONFIG_KEXEC_CORE
 651 static void crash_vmclear_local_loaded_vmcss(void)
 652 {
 653         int cpu = raw_smp_processor_id();
 654         struct loaded_vmcs *v;
 655 
 656         list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
 657                             loaded_vmcss_on_cpu_link)
 658                 vmcs_clear(v->vmcs);
 659 }
 660 #endif 
 661 
 662 static void __loaded_vmcs_clear(void *arg)
 663 {
 664         struct loaded_vmcs *loaded_vmcs = arg;
 665         int cpu = raw_smp_processor_id();
 666 
 667         if (loaded_vmcs->cpu != cpu)
 668                 return; 
 669         if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
 670                 per_cpu(current_vmcs, cpu) = NULL;
 671 
 672         vmcs_clear(loaded_vmcs->vmcs);
 673         if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
 674                 vmcs_clear(loaded_vmcs->shadow_vmcs);
 675 
 676         list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
 677 
 678         
 679 
 680 
 681 
 682 
 683 
 684 
 685         smp_wmb();
 686 
 687         loaded_vmcs->cpu = -1;
 688         loaded_vmcs->launched = 0;
 689 }
 690 
 691 void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
 692 {
 693         int cpu = loaded_vmcs->cpu;
 694 
 695         if (cpu != -1)
 696                 smp_call_function_single(cpu,
 697                          __loaded_vmcs_clear, loaded_vmcs, 1);
 698 }
 699 
 700 static bool vmx_segment_cache_test_set(struct vcpu_vmx *vmx, unsigned seg,
 701                                        unsigned field)
 702 {
 703         bool ret;
 704         u32 mask = 1 << (seg * SEG_FIELD_NR + field);
 705 
 706         if (!(vmx->vcpu.arch.regs_avail & (1 << VCPU_EXREG_SEGMENTS))) {
 707                 vmx->vcpu.arch.regs_avail |= (1 << VCPU_EXREG_SEGMENTS);
 708                 vmx->segment_cache.bitmask = 0;
 709         }
 710         ret = vmx->segment_cache.bitmask & mask;
 711         vmx->segment_cache.bitmask |= mask;
 712         return ret;
 713 }
 714 
 715 static u16 vmx_read_guest_seg_selector(struct vcpu_vmx *vmx, unsigned seg)
 716 {
 717         u16 *p = &vmx->segment_cache.seg[seg].selector;
 718 
 719         if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_SEL))
 720                 *p = vmcs_read16(kvm_vmx_segment_fields[seg].selector);
 721         return *p;
 722 }
 723 
 724 static ulong vmx_read_guest_seg_base(struct vcpu_vmx *vmx, unsigned seg)
 725 {
 726         ulong *p = &vmx->segment_cache.seg[seg].base;
 727 
 728         if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_BASE))
 729                 *p = vmcs_readl(kvm_vmx_segment_fields[seg].base);
 730         return *p;
 731 }
 732 
 733 static u32 vmx_read_guest_seg_limit(struct vcpu_vmx *vmx, unsigned seg)
 734 {
 735         u32 *p = &vmx->segment_cache.seg[seg].limit;
 736 
 737         if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_LIMIT))
 738                 *p = vmcs_read32(kvm_vmx_segment_fields[seg].limit);
 739         return *p;
 740 }
 741 
 742 static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, unsigned seg)
 743 {
 744         u32 *p = &vmx->segment_cache.seg[seg].ar;
 745 
 746         if (!vmx_segment_cache_test_set(vmx, seg, SEG_FIELD_AR))
 747                 *p = vmcs_read32(kvm_vmx_segment_fields[seg].ar_bytes);
 748         return *p;
 749 }
 750 
 751 void update_exception_bitmap(struct kvm_vcpu *vcpu)
 752 {
 753         u32 eb;
 754 
 755         eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
 756              (1u << DB_VECTOR) | (1u << AC_VECTOR);
 757         
 758 
 759 
 760 
 761 
 762 
 763         if (enable_vmware_backdoor)
 764                 eb |= (1u << GP_VECTOR);
 765         if ((vcpu->guest_debug &
 766              (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
 767             (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
 768                 eb |= 1u << BP_VECTOR;
 769         if (to_vmx(vcpu)->rmode.vm86_active)
 770                 eb = ~0;
 771         if (enable_ept)
 772                 eb &= ~(1u << PF_VECTOR); 
 773 
 774         
 775 
 776 
 777 
 778 
 779         if (is_guest_mode(vcpu))
 780                 eb |= get_vmcs12(vcpu)->exception_bitmap;
 781 
 782         vmcs_write32(EXCEPTION_BITMAP, eb);
 783 }
 784 
 785 
 786 
 787 
 788 static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
 789 {
 790         unsigned long *msr_bitmap;
 791         int f = sizeof(unsigned long);
 792 
 793         if (!cpu_has_vmx_msr_bitmap())
 794                 return true;
 795 
 796         msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
 797 
 798         if (msr <= 0x1fff) {
 799                 return !!test_bit(msr, msr_bitmap + 0x800 / f);
 800         } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
 801                 msr &= 0x1fff;
 802                 return !!test_bit(msr, msr_bitmap + 0xc00 / f);
 803         }
 804 
 805         return true;
 806 }
 807 
 808 static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
 809                 unsigned long entry, unsigned long exit)
 810 {
 811         vm_entry_controls_clearbit(vmx, entry);
 812         vm_exit_controls_clearbit(vmx, exit);
 813 }
 814 
 815 static int find_msr(struct vmx_msrs *m, unsigned int msr)
 816 {
 817         unsigned int i;
 818 
 819         for (i = 0; i < m->nr; ++i) {
 820                 if (m->val[i].index == msr)
 821                         return i;
 822         }
 823         return -ENOENT;
 824 }
 825 
 826 static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 827 {
 828         int i;
 829         struct msr_autoload *m = &vmx->msr_autoload;
 830 
 831         switch (msr) {
 832         case MSR_EFER:
 833                 if (cpu_has_load_ia32_efer()) {
 834                         clear_atomic_switch_msr_special(vmx,
 835                                         VM_ENTRY_LOAD_IA32_EFER,
 836                                         VM_EXIT_LOAD_IA32_EFER);
 837                         return;
 838                 }
 839                 break;
 840         case MSR_CORE_PERF_GLOBAL_CTRL:
 841                 if (cpu_has_load_perf_global_ctrl()) {
 842                         clear_atomic_switch_msr_special(vmx,
 843                                         VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
 844                                         VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
 845                         return;
 846                 }
 847                 break;
 848         }
 849         i = find_msr(&m->guest, msr);
 850         if (i < 0)
 851                 goto skip_guest;
 852         --m->guest.nr;
 853         m->guest.val[i] = m->guest.val[m->guest.nr];
 854         vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
 855 
 856 skip_guest:
 857         i = find_msr(&m->host, msr);
 858         if (i < 0)
 859                 return;
 860 
 861         --m->host.nr;
 862         m->host.val[i] = m->host.val[m->host.nr];
 863         vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
 864 }
 865 
 866 static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
 867                 unsigned long entry, unsigned long exit,
 868                 unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
 869                 u64 guest_val, u64 host_val)
 870 {
 871         vmcs_write64(guest_val_vmcs, guest_val);
 872         if (host_val_vmcs != HOST_IA32_EFER)
 873                 vmcs_write64(host_val_vmcs, host_val);
 874         vm_entry_controls_setbit(vmx, entry);
 875         vm_exit_controls_setbit(vmx, exit);
 876 }
 877 
 878 static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 879                                   u64 guest_val, u64 host_val, bool entry_only)
 880 {
 881         int i, j = 0;
 882         struct msr_autoload *m = &vmx->msr_autoload;
 883 
 884         switch (msr) {
 885         case MSR_EFER:
 886                 if (cpu_has_load_ia32_efer()) {
 887                         add_atomic_switch_msr_special(vmx,
 888                                         VM_ENTRY_LOAD_IA32_EFER,
 889                                         VM_EXIT_LOAD_IA32_EFER,
 890                                         GUEST_IA32_EFER,
 891                                         HOST_IA32_EFER,
 892                                         guest_val, host_val);
 893                         return;
 894                 }
 895                 break;
 896         case MSR_CORE_PERF_GLOBAL_CTRL:
 897                 if (cpu_has_load_perf_global_ctrl()) {
 898                         add_atomic_switch_msr_special(vmx,
 899                                         VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
 900                                         VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
 901                                         GUEST_IA32_PERF_GLOBAL_CTRL,
 902                                         HOST_IA32_PERF_GLOBAL_CTRL,
 903                                         guest_val, host_val);
 904                         return;
 905                 }
 906                 break;
 907         case MSR_IA32_PEBS_ENABLE:
 908                 
 909 
 910 
 911 
 912 
 913                 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 914         }
 915 
 916         i = find_msr(&m->guest, msr);
 917         if (!entry_only)
 918                 j = find_msr(&m->host, msr);
 919 
 920         if ((i < 0 && m->guest.nr == NR_AUTOLOAD_MSRS) ||
 921                 (j < 0 &&  m->host.nr == NR_AUTOLOAD_MSRS)) {
 922                 printk_once(KERN_WARNING "Not enough msr switch entries. "
 923                                 "Can't add msr %x\n", msr);
 924                 return;
 925         }
 926         if (i < 0) {
 927                 i = m->guest.nr++;
 928                 vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->guest.nr);
 929         }
 930         m->guest.val[i].index = msr;
 931         m->guest.val[i].value = guest_val;
 932 
 933         if (entry_only)
 934                 return;
 935 
 936         if (j < 0) {
 937                 j = m->host.nr++;
 938                 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
 939         }
 940         m->host.val[j].index = msr;
 941         m->host.val[j].value = host_val;
 942 }
 943 
 944 static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 945 {
 946         u64 guest_efer = vmx->vcpu.arch.efer;
 947         u64 ignore_bits = 0;
 948 
 949         
 950         if (!enable_ept)
 951                 guest_efer |= EFER_NX;
 952 
 953         
 954 
 955 
 956         ignore_bits |= EFER_SCE;
 957 #ifdef CONFIG_X86_64
 958         ignore_bits |= EFER_LMA | EFER_LME;
 959         
 960         if (guest_efer & EFER_LMA)
 961                 ignore_bits &= ~(u64)EFER_SCE;
 962 #endif
 963 
 964         
 965 
 966 
 967 
 968 
 969         if (cpu_has_load_ia32_efer() ||
 970             (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
 971                 if (!(guest_efer & EFER_LMA))
 972                         guest_efer &= ~EFER_LME;
 973                 if (guest_efer != host_efer)
 974                         add_atomic_switch_msr(vmx, MSR_EFER,
 975                                               guest_efer, host_efer, false);
 976                 else
 977                         clear_atomic_switch_msr(vmx, MSR_EFER);
 978                 return false;
 979         } else {
 980                 clear_atomic_switch_msr(vmx, MSR_EFER);
 981 
 982                 guest_efer &= ~ignore_bits;
 983                 guest_efer |= host_efer & ignore_bits;
 984 
 985                 vmx->guest_msrs[efer_offset].data = guest_efer;
 986                 vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
 987 
 988                 return true;
 989         }
 990 }
 991 
 992 #ifdef CONFIG_X86_32
 993 
 994 
 995 
 996 
 997 
 998 static unsigned long segment_base(u16 selector)
 999 {
1000         struct desc_struct *table;
1001         unsigned long v;
1002 
1003         if (!(selector & ~SEGMENT_RPL_MASK))
1004                 return 0;
1005 
1006         table = get_current_gdt_ro();
1007 
1008         if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
1009                 u16 ldt_selector = kvm_read_ldt();
1010 
1011                 if (!(ldt_selector & ~SEGMENT_RPL_MASK))
1012                         return 0;
1013 
1014                 table = (struct desc_struct *)segment_base(ldt_selector);
1015         }
1016         v = get_desc_base(&table[selector >> 3]);
1017         return v;
1018 }
1019 #endif
1020 
1021 static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range)
1022 {
1023         u32 i;
1024 
1025         wrmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
1026         wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
1027         wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
1028         wrmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
1029         for (i = 0; i < addr_range; i++) {
1030                 wrmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
1031                 wrmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
1032         }
1033 }
1034 
1035 static inline void pt_save_msr(struct pt_ctx *ctx, u32 addr_range)
1036 {
1037         u32 i;
1038 
1039         rdmsrl(MSR_IA32_RTIT_STATUS, ctx->status);
1040         rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, ctx->output_base);
1041         rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, ctx->output_mask);
1042         rdmsrl(MSR_IA32_RTIT_CR3_MATCH, ctx->cr3_match);
1043         for (i = 0; i < addr_range; i++) {
1044                 rdmsrl(MSR_IA32_RTIT_ADDR0_A + i * 2, ctx->addr_a[i]);
1045                 rdmsrl(MSR_IA32_RTIT_ADDR0_B + i * 2, ctx->addr_b[i]);
1046         }
1047 }
1048 
1049 static void pt_guest_enter(struct vcpu_vmx *vmx)
1050 {
1051         if (pt_mode == PT_MODE_SYSTEM)
1052                 return;
1053 
1054         
1055 
1056 
1057 
1058         rdmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
1059         if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
1060                 wrmsrl(MSR_IA32_RTIT_CTL, 0);
1061                 pt_save_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
1062                 pt_load_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
1063         }
1064 }
1065 
1066 static void pt_guest_exit(struct vcpu_vmx *vmx)
1067 {
1068         if (pt_mode == PT_MODE_SYSTEM)
1069                 return;
1070 
1071         if (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) {
1072                 pt_save_msr(&vmx->pt_desc.guest, vmx->pt_desc.addr_range);
1073                 pt_load_msr(&vmx->pt_desc.host, vmx->pt_desc.addr_range);
1074         }
1075 
1076         
1077         wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
1078 }
1079 
1080 void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
1081                         unsigned long fs_base, unsigned long gs_base)
1082 {
1083         if (unlikely(fs_sel != host->fs_sel)) {
1084                 if (!(fs_sel & 7))
1085                         vmcs_write16(HOST_FS_SELECTOR, fs_sel);
1086                 else
1087                         vmcs_write16(HOST_FS_SELECTOR, 0);
1088                 host->fs_sel = fs_sel;
1089         }
1090         if (unlikely(gs_sel != host->gs_sel)) {
1091                 if (!(gs_sel & 7))
1092                         vmcs_write16(HOST_GS_SELECTOR, gs_sel);
1093                 else
1094                         vmcs_write16(HOST_GS_SELECTOR, 0);
1095                 host->gs_sel = gs_sel;
1096         }
1097         if (unlikely(fs_base != host->fs_base)) {
1098                 vmcs_writel(HOST_FS_BASE, fs_base);
1099                 host->fs_base = fs_base;
1100         }
1101         if (unlikely(gs_base != host->gs_base)) {
1102                 vmcs_writel(HOST_GS_BASE, gs_base);
1103                 host->gs_base = gs_base;
1104         }
1105 }
1106 
1107 void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
1108 {
1109         struct vcpu_vmx *vmx = to_vmx(vcpu);
1110         struct vmcs_host_state *host_state;
1111 #ifdef CONFIG_X86_64
1112         int cpu = raw_smp_processor_id();
1113 #endif
1114         unsigned long fs_base, gs_base;
1115         u16 fs_sel, gs_sel;
1116         int i;
1117 
1118         vmx->req_immediate_exit = false;
1119 
1120         
1121 
1122 
1123 
1124 
1125         if (!vmx->guest_msrs_ready) {
1126                 vmx->guest_msrs_ready = true;
1127                 for (i = 0; i < vmx->save_nmsrs; ++i)
1128                         kvm_set_shared_msr(vmx->guest_msrs[i].index,
1129                                            vmx->guest_msrs[i].data,
1130                                            vmx->guest_msrs[i].mask);
1131 
1132         }
1133         if (vmx->guest_state_loaded)
1134                 return;
1135 
1136         host_state = &vmx->loaded_vmcs->host_state;
1137 
1138         
1139 
1140 
1141 
1142         host_state->ldt_sel = kvm_read_ldt();
1143 
1144 #ifdef CONFIG_X86_64
1145         savesegment(ds, host_state->ds_sel);
1146         savesegment(es, host_state->es_sel);
1147 
1148         gs_base = cpu_kernelmode_gs_base(cpu);
1149         if (likely(is_64bit_mm(current->mm))) {
1150                 save_fsgs_for_kvm();
1151                 fs_sel = current->thread.fsindex;
1152                 gs_sel = current->thread.gsindex;
1153                 fs_base = current->thread.fsbase;
1154                 vmx->msr_host_kernel_gs_base = current->thread.gsbase;
1155         } else {
1156                 savesegment(fs, fs_sel);
1157                 savesegment(gs, gs_sel);
1158                 fs_base = read_msr(MSR_FS_BASE);
1159                 vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);
1160         }
1161 
1162         wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
1163 #else
1164         savesegment(fs, fs_sel);
1165         savesegment(gs, gs_sel);
1166         fs_base = segment_base(fs_sel);
1167         gs_base = segment_base(gs_sel);
1168 #endif
1169 
1170         vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
1171         vmx->guest_state_loaded = true;
1172 }
1173 
1174 static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
1175 {
1176         struct vmcs_host_state *host_state;
1177 
1178         if (!vmx->guest_state_loaded)
1179                 return;
1180 
1181         host_state = &vmx->loaded_vmcs->host_state;
1182 
1183         ++vmx->vcpu.stat.host_state_reload;
1184 
1185 #ifdef CONFIG_X86_64
1186         rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
1187 #endif
1188         if (host_state->ldt_sel || (host_state->gs_sel & 7)) {
1189                 kvm_load_ldt(host_state->ldt_sel);
1190 #ifdef CONFIG_X86_64
1191                 load_gs_index(host_state->gs_sel);
1192 #else
1193                 loadsegment(gs, host_state->gs_sel);
1194 #endif
1195         }
1196         if (host_state->fs_sel & 7)
1197                 loadsegment(fs, host_state->fs_sel);
1198 #ifdef CONFIG_X86_64
1199         if (unlikely(host_state->ds_sel | host_state->es_sel)) {
1200                 loadsegment(ds, host_state->ds_sel);
1201                 loadsegment(es, host_state->es_sel);
1202         }
1203 #endif
1204         invalidate_tss_limit();
1205 #ifdef CONFIG_X86_64
1206         wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
1207 #endif
1208         load_fixmap_gdt(raw_smp_processor_id());
1209         vmx->guest_state_loaded = false;
1210         vmx->guest_msrs_ready = false;
1211 }
1212 
1213 #ifdef CONFIG_X86_64
1214 static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
1215 {
1216         preempt_disable();
1217         if (vmx->guest_state_loaded)
1218                 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
1219         preempt_enable();
1220         return vmx->msr_guest_kernel_gs_base;
1221 }
1222 
1223 static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
1224 {
1225         preempt_disable();
1226         if (vmx->guest_state_loaded)
1227                 wrmsrl(MSR_KERNEL_GS_BASE, data);
1228         preempt_enable();
1229         vmx->msr_guest_kernel_gs_base = data;
1230 }
1231 #endif
1232 
1233 static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
1234 {
1235         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
1236         struct pi_desc old, new;
1237         unsigned int dest;
1238 
1239         
1240 
1241 
1242 
1243 
1244 
1245         if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
1246                 return;
1247 
1248         
1249 
1250 
1251 
1252 
1253 
1254 
1255         if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
1256                 pi_clear_sn(pi_desc);
1257                 goto after_clear_sn;
1258         }
1259 
1260         
1261         do {
1262                 old.control = new.control = pi_desc->control;
1263 
1264                 dest = cpu_physical_id(cpu);
1265 
1266                 if (x2apic_enabled())
1267                         new.ndst = dest;
1268                 else
1269                         new.ndst = (dest << 8) & 0xFF00;
1270 
1271                 new.sn = 0;
1272         } while (cmpxchg64(&pi_desc->control, old.control,
1273                            new.control) != old.control);
1274 
1275 after_clear_sn:
1276 
1277         
1278 
1279 
1280 
1281 
1282 
1283         smp_mb__after_atomic();
1284 
1285         if (!pi_is_pir_empty(pi_desc))
1286                 pi_set_on(pi_desc);
1287 }
1288 
1289 void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
1290                         struct loaded_vmcs *buddy)
1291 {
1292         struct vcpu_vmx *vmx = to_vmx(vcpu);
1293         bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
1294         struct vmcs *prev;
1295 
1296         if (!already_loaded) {
1297                 loaded_vmcs_clear(vmx->loaded_vmcs);
1298                 local_irq_disable();
1299 
1300                 
1301 
1302 
1303 
1304 
1305 
1306                 smp_rmb();
1307 
1308                 list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
1309                          &per_cpu(loaded_vmcss_on_cpu, cpu));
1310                 local_irq_enable();
1311         }
1312 
1313         prev = per_cpu(current_vmcs, cpu);
1314         if (prev != vmx->loaded_vmcs->vmcs) {
1315                 per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
1316                 vmcs_load(vmx->loaded_vmcs->vmcs);
1317 
1318                 
1319 
1320 
1321 
1322 
1323                 if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev))
1324                         indirect_branch_prediction_barrier();
1325         }
1326 
1327         if (!already_loaded) {
1328                 void *gdt = get_current_gdt_ro();
1329                 unsigned long sysenter_esp;
1330 
1331                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
1332 
1333                 
1334 
1335 
1336 
1337                 vmcs_writel(HOST_TR_BASE,
1338                             (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
1339                 vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   
1340 
1341                 
1342 
1343 
1344 
1345 
1346 
1347                 BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67);
1348 
1349                 rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
1350                 vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); 
1351 
1352                 vmx->loaded_vmcs->cpu = cpu;
1353         }
1354 
1355         
1356         if (kvm_has_tsc_control &&
1357             vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
1358                 decache_tsc_multiplier(vmx);
1359 }
1360 
1361 
1362 
1363 
1364 
1365 void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1366 {
1367         struct vcpu_vmx *vmx = to_vmx(vcpu);
1368 
1369         vmx_vcpu_load_vmcs(vcpu, cpu, NULL);
1370 
1371         vmx_vcpu_pi_load(vcpu, cpu);
1372 
1373         vmx->host_debugctlmsr = get_debugctlmsr();
1374 }
1375 
1376 static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
1377 {
1378         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
1379 
1380         if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
1381                 !irq_remapping_cap(IRQ_POSTING_CAP)  ||
1382                 !kvm_vcpu_apicv_active(vcpu))
1383                 return;
1384 
1385         
1386         if (vcpu->preempted)
1387                 pi_set_sn(pi_desc);
1388 }
1389 
1390 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
1391 {
1392         vmx_vcpu_pi_put(vcpu);
1393 
1394         vmx_prepare_switch_to_host(to_vmx(vcpu));
1395 }
1396 
1397 static bool emulation_required(struct kvm_vcpu *vcpu)
1398 {
1399         return emulate_invalid_guest_state && !guest_state_valid(vcpu);
1400 }
1401 
1402 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
1403 
1404 unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
1405 {
1406         unsigned long rflags, save_rflags;
1407 
1408         if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail)) {
1409                 __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
1410                 rflags = vmcs_readl(GUEST_RFLAGS);
1411                 if (to_vmx(vcpu)->rmode.vm86_active) {
1412                         rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
1413                         save_rflags = to_vmx(vcpu)->rmode.save_rflags;
1414                         rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
1415                 }
1416                 to_vmx(vcpu)->rflags = rflags;
1417         }
1418         return to_vmx(vcpu)->rflags;
1419 }
1420 
1421 void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1422 {
1423         unsigned long old_rflags = vmx_get_rflags(vcpu);
1424 
1425         __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail);
1426         to_vmx(vcpu)->rflags = rflags;
1427         if (to_vmx(vcpu)->rmode.vm86_active) {
1428                 to_vmx(vcpu)->rmode.save_rflags = rflags;
1429                 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
1430         }
1431         vmcs_writel(GUEST_RFLAGS, rflags);
1432 
1433         if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM)
1434                 to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
1435 }
1436 
1437 u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
1438 {
1439         u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
1440         int ret = 0;
1441 
1442         if (interruptibility & GUEST_INTR_STATE_STI)
1443                 ret |= KVM_X86_SHADOW_INT_STI;
1444         if (interruptibility & GUEST_INTR_STATE_MOV_SS)
1445                 ret |= KVM_X86_SHADOW_INT_MOV_SS;
1446 
1447         return ret;
1448 }
1449 
1450 void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
1451 {
1452         u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
1453         u32 interruptibility = interruptibility_old;
1454 
1455         interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS);
1456 
1457         if (mask & KVM_X86_SHADOW_INT_MOV_SS)
1458                 interruptibility |= GUEST_INTR_STATE_MOV_SS;
1459         else if (mask & KVM_X86_SHADOW_INT_STI)
1460                 interruptibility |= GUEST_INTR_STATE_STI;
1461 
1462         if ((interruptibility != interruptibility_old))
1463                 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
1464 }
1465 
1466 static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
1467 {
1468         struct vcpu_vmx *vmx = to_vmx(vcpu);
1469         unsigned long value;
1470 
1471         
1472 
1473 
1474 
1475         if (data & vmx->pt_desc.ctl_bitmask)
1476                 return 1;
1477 
1478         
1479 
1480 
1481 
1482         if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) &&
1483                 ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN))
1484                 return 1;
1485 
1486         
1487 
1488 
1489 
1490 
1491         if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) &&
1492                 !(data & RTIT_CTL_FABRIC_EN) &&
1493                 !intel_pt_validate_cap(vmx->pt_desc.caps,
1494                                         PT_CAP_single_range_output))
1495                 return 1;
1496 
1497         
1498 
1499 
1500 
1501         value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods);
1502         if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) &&
1503                         !test_bit((data & RTIT_CTL_MTC_RANGE) >>
1504                         RTIT_CTL_MTC_RANGE_OFFSET, &value))
1505                 return 1;
1506         value = intel_pt_validate_cap(vmx->pt_desc.caps,
1507                                                 PT_CAP_cycle_thresholds);
1508         if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
1509                         !test_bit((data & RTIT_CTL_CYC_THRESH) >>
1510                         RTIT_CTL_CYC_THRESH_OFFSET, &value))
1511                 return 1;
1512         value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods);
1513         if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
1514                         !test_bit((data & RTIT_CTL_PSB_FREQ) >>
1515                         RTIT_CTL_PSB_FREQ_OFFSET, &value))
1516                 return 1;
1517 
1518         
1519 
1520 
1521 
1522         value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET;
1523         if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2))
1524                 return 1;
1525         value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET;
1526         if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2))
1527                 return 1;
1528         value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET;
1529         if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2))
1530                 return 1;
1531         value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET;
1532         if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2))
1533                 return 1;
1534 
1535         return 0;
1536 }
1537 
1538 static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
1539 {
1540         unsigned long rip;
1541 
1542         
1543 
1544 
1545 
1546 
1547 
1548 
1549 
1550         if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
1551             to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
1552                 rip = kvm_rip_read(vcpu);
1553                 rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
1554                 kvm_rip_write(vcpu, rip);
1555         } else {
1556                 if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
1557                         return 0;
1558         }
1559 
1560         
1561         vmx_set_interrupt_shadow(vcpu, 0);
1562 
1563         return 1;
1564 }
1565 
1566 static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
1567 {
1568         
1569 
1570 
1571 
1572 
1573 
1574         if (kvm_hlt_in_guest(vcpu->kvm) &&
1575                         vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
1576                 vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
1577 }
1578 
1579 static void vmx_queue_exception(struct kvm_vcpu *vcpu)
1580 {
1581         struct vcpu_vmx *vmx = to_vmx(vcpu);
1582         unsigned nr = vcpu->arch.exception.nr;
1583         bool has_error_code = vcpu->arch.exception.has_error_code;
1584         u32 error_code = vcpu->arch.exception.error_code;
1585         u32 intr_info = nr | INTR_INFO_VALID_MASK;
1586 
1587         kvm_deliver_exception_payload(vcpu);
1588 
1589         if (has_error_code) {
1590                 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
1591                 intr_info |= INTR_INFO_DELIVER_CODE_MASK;
1592         }
1593 
1594         if (vmx->rmode.vm86_active) {
1595                 int inc_eip = 0;
1596                 if (kvm_exception_is_soft(nr))
1597                         inc_eip = vcpu->arch.event_exit_inst_len;
1598                 kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
1599                 return;
1600         }
1601 
1602         WARN_ON_ONCE(vmx->emulation_required);
1603 
1604         if (kvm_exception_is_soft(nr)) {
1605                 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
1606                              vmx->vcpu.arch.event_exit_inst_len);
1607                 intr_info |= INTR_TYPE_SOFT_EXCEPTION;
1608         } else
1609                 intr_info |= INTR_TYPE_HARD_EXCEPTION;
1610 
1611         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
1612 
1613         vmx_clear_hlt(vcpu);
1614 }
1615 
1616 static bool vmx_rdtscp_supported(void)
1617 {
1618         return cpu_has_vmx_rdtscp();
1619 }
1620 
1621 static bool vmx_invpcid_supported(void)
1622 {
1623         return cpu_has_vmx_invpcid();
1624 }
1625 
1626 
1627 
1628 
1629 static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
1630 {
1631         struct shared_msr_entry tmp;
1632 
1633         tmp = vmx->guest_msrs[to];
1634         vmx->guest_msrs[to] = vmx->guest_msrs[from];
1635         vmx->guest_msrs[from] = tmp;
1636 }
1637 
1638 
1639 
1640 
1641 
1642 
1643 static void setup_msrs(struct vcpu_vmx *vmx)
1644 {
1645         int save_nmsrs, index;
1646 
1647         save_nmsrs = 0;
1648 #ifdef CONFIG_X86_64
1649         
1650 
1651 
1652 
1653         if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
1654                 index = __find_msr_index(vmx, MSR_STAR);
1655                 if (index >= 0)
1656                         move_msr_up(vmx, index, save_nmsrs++);
1657                 index = __find_msr_index(vmx, MSR_LSTAR);
1658                 if (index >= 0)
1659                         move_msr_up(vmx, index, save_nmsrs++);
1660                 index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
1661                 if (index >= 0)
1662                         move_msr_up(vmx, index, save_nmsrs++);
1663         }
1664 #endif
1665         index = __find_msr_index(vmx, MSR_EFER);
1666         if (index >= 0 && update_transition_efer(vmx, index))
1667                 move_msr_up(vmx, index, save_nmsrs++);
1668         index = __find_msr_index(vmx, MSR_TSC_AUX);
1669         if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
1670                 move_msr_up(vmx, index, save_nmsrs++);
1671 
1672         vmx->save_nmsrs = save_nmsrs;
1673         vmx->guest_msrs_ready = false;
1674 
1675         if (cpu_has_vmx_msr_bitmap())
1676                 vmx_update_msr_bitmap(&vmx->vcpu);
1677 }
1678 
1679 static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
1680 {
1681         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1682 
1683         if (is_guest_mode(vcpu) &&
1684             (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
1685                 return vcpu->arch.tsc_offset - vmcs12->tsc_offset;
1686 
1687         return vcpu->arch.tsc_offset;
1688 }
1689 
1690 static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1691 {
1692         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
1693         u64 g_tsc_offset = 0;
1694 
1695         
1696 
1697 
1698 
1699 
1700 
1701         if (is_guest_mode(vcpu) &&
1702             (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING))
1703                 g_tsc_offset = vmcs12->tsc_offset;
1704 
1705         trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1706                                    vcpu->arch.tsc_offset - g_tsc_offset,
1707                                    offset);
1708         vmcs_write64(TSC_OFFSET, offset + g_tsc_offset);
1709         return offset + g_tsc_offset;
1710 }
1711 
1712 
1713 
1714 
1715 
1716 
1717 
1718 bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
1719 {
1720         return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
1721 }
1722 
1723 static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
1724                                                  uint64_t val)
1725 {
1726         uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
1727 
1728         return !(val & ~valid_bits);
1729 }
1730 
1731 static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
1732 {
1733         switch (msr->index) {
1734         case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
1735                 if (!nested)
1736                         return 1;
1737                 return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
1738         default:
1739                 return 1;
1740         }
1741 
1742         return 0;
1743 }
1744 
1745 
1746 
1747 
1748 
1749 
1750 static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1751 {
1752         struct vcpu_vmx *vmx = to_vmx(vcpu);
1753         struct shared_msr_entry *msr;
1754         u32 index;
1755 
1756         switch (msr_info->index) {
1757 #ifdef CONFIG_X86_64
1758         case MSR_FS_BASE:
1759                 msr_info->data = vmcs_readl(GUEST_FS_BASE);
1760                 break;
1761         case MSR_GS_BASE:
1762                 msr_info->data = vmcs_readl(GUEST_GS_BASE);
1763                 break;
1764         case MSR_KERNEL_GS_BASE:
1765                 msr_info->data = vmx_read_guest_kernel_gs_base(vmx);
1766                 break;
1767 #endif
1768         case MSR_EFER:
1769                 return kvm_get_msr_common(vcpu, msr_info);
1770         case MSR_IA32_UMWAIT_CONTROL:
1771                 if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
1772                         return 1;
1773 
1774                 msr_info->data = vmx->msr_ia32_umwait_control;
1775                 break;
1776         case MSR_IA32_SPEC_CTRL:
1777                 if (!msr_info->host_initiated &&
1778                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
1779                         return 1;
1780 
1781                 msr_info->data = to_vmx(vcpu)->spec_ctrl;
1782                 break;
1783         case MSR_IA32_SYSENTER_CS:
1784                 msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
1785                 break;
1786         case MSR_IA32_SYSENTER_EIP:
1787                 msr_info->data = vmcs_readl(GUEST_SYSENTER_EIP);
1788                 break;
1789         case MSR_IA32_SYSENTER_ESP:
1790                 msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
1791                 break;
1792         case MSR_IA32_BNDCFGS:
1793                 if (!kvm_mpx_supported() ||
1794                     (!msr_info->host_initiated &&
1795                      !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
1796                         return 1;
1797                 msr_info->data = vmcs_read64(GUEST_BNDCFGS);
1798                 break;
1799         case MSR_IA32_MCG_EXT_CTL:
1800                 if (!msr_info->host_initiated &&
1801                     !(vmx->msr_ia32_feature_control &
1802                       FEATURE_CONTROL_LMCE))
1803                         return 1;
1804                 msr_info->data = vcpu->arch.mcg_ext_ctl;
1805                 break;
1806         case MSR_IA32_FEATURE_CONTROL:
1807                 msr_info->data = vmx->msr_ia32_feature_control;
1808                 break;
1809         case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
1810                 if (!nested_vmx_allowed(vcpu))
1811                         return 1;
1812                 return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
1813                                        &msr_info->data);
1814         case MSR_IA32_XSS:
1815                 if (!vmx_xsaves_supported() ||
1816                     (!msr_info->host_initiated &&
1817                      !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
1818                        guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
1819                         return 1;
1820                 msr_info->data = vcpu->arch.ia32_xss;
1821                 break;
1822         case MSR_IA32_RTIT_CTL:
1823                 if (pt_mode != PT_MODE_HOST_GUEST)
1824                         return 1;
1825                 msr_info->data = vmx->pt_desc.guest.ctl;
1826                 break;
1827         case MSR_IA32_RTIT_STATUS:
1828                 if (pt_mode != PT_MODE_HOST_GUEST)
1829                         return 1;
1830                 msr_info->data = vmx->pt_desc.guest.status;
1831                 break;
1832         case MSR_IA32_RTIT_CR3_MATCH:
1833                 if ((pt_mode != PT_MODE_HOST_GUEST) ||
1834                         !intel_pt_validate_cap(vmx->pt_desc.caps,
1835                                                 PT_CAP_cr3_filtering))
1836                         return 1;
1837                 msr_info->data = vmx->pt_desc.guest.cr3_match;
1838                 break;
1839         case MSR_IA32_RTIT_OUTPUT_BASE:
1840                 if ((pt_mode != PT_MODE_HOST_GUEST) ||
1841                         (!intel_pt_validate_cap(vmx->pt_desc.caps,
1842                                         PT_CAP_topa_output) &&
1843                          !intel_pt_validate_cap(vmx->pt_desc.caps,
1844                                         PT_CAP_single_range_output)))
1845                         return 1;
1846                 msr_info->data = vmx->pt_desc.guest.output_base;
1847                 break;
1848         case MSR_IA32_RTIT_OUTPUT_MASK:
1849                 if ((pt_mode != PT_MODE_HOST_GUEST) ||
1850                         (!intel_pt_validate_cap(vmx->pt_desc.caps,
1851                                         PT_CAP_topa_output) &&
1852                          !intel_pt_validate_cap(vmx->pt_desc.caps,
1853                                         PT_CAP_single_range_output)))
1854                         return 1;
1855                 msr_info->data = vmx->pt_desc.guest.output_mask;
1856                 break;
1857         case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
1858                 index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
1859                 if ((pt_mode != PT_MODE_HOST_GUEST) ||
1860                         (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
1861                                         PT_CAP_num_address_ranges)))
1862                         return 1;
1863                 if (index % 2)
1864                         msr_info->data = vmx->pt_desc.guest.addr_b[index / 2];
1865                 else
1866                         msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
1867                 break;
1868         case MSR_TSC_AUX:
1869                 if (!msr_info->host_initiated &&
1870                     !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
1871                         return 1;
1872                 
1873         default:
1874                 msr = find_msr_entry(vmx, msr_info->index);
1875                 if (msr) {
1876                         msr_info->data = msr->data;
1877                         break;
1878                 }
1879                 return kvm_get_msr_common(vcpu, msr_info);
1880         }
1881 
1882         return 0;
1883 }
1884 
1885 
1886 
1887 
1888 
1889 
1890 static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
1891 {
1892         struct vcpu_vmx *vmx = to_vmx(vcpu);
1893         struct shared_msr_entry *msr;
1894         int ret = 0;
1895         u32 msr_index = msr_info->index;
1896         u64 data = msr_info->data;
1897         u32 index;
1898 
1899         switch (msr_index) {
1900         case MSR_EFER:
1901                 ret = kvm_set_msr_common(vcpu, msr_info);
1902                 break;
1903 #ifdef CONFIG_X86_64
1904         case MSR_FS_BASE:
1905                 vmx_segment_cache_clear(vmx);
1906                 vmcs_writel(GUEST_FS_BASE, data);
1907                 break;
1908         case MSR_GS_BASE:
1909                 vmx_segment_cache_clear(vmx);
1910                 vmcs_writel(GUEST_GS_BASE, data);
1911                 break;
1912         case MSR_KERNEL_GS_BASE:
1913                 vmx_write_guest_kernel_gs_base(vmx, data);
1914                 break;
1915 #endif
1916         case MSR_IA32_SYSENTER_CS:
1917                 if (is_guest_mode(vcpu))
1918                         get_vmcs12(vcpu)->guest_sysenter_cs = data;
1919                 vmcs_write32(GUEST_SYSENTER_CS, data);
1920                 break;
1921         case MSR_IA32_SYSENTER_EIP:
1922                 if (is_guest_mode(vcpu))
1923                         get_vmcs12(vcpu)->guest_sysenter_eip = data;
1924                 vmcs_writel(GUEST_SYSENTER_EIP, data);
1925                 break;
1926         case MSR_IA32_SYSENTER_ESP:
1927                 if (is_guest_mode(vcpu))
1928                         get_vmcs12(vcpu)->guest_sysenter_esp = data;
1929                 vmcs_writel(GUEST_SYSENTER_ESP, data);
1930                 break;
1931         case MSR_IA32_DEBUGCTLMSR:
1932                 if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
1933                                                 VM_EXIT_SAVE_DEBUG_CONTROLS)
1934                         get_vmcs12(vcpu)->guest_ia32_debugctl = data;
1935 
1936                 ret = kvm_set_msr_common(vcpu, msr_info);
1937                 break;
1938 
1939         case MSR_IA32_BNDCFGS:
1940                 if (!kvm_mpx_supported() ||
1941                     (!msr_info->host_initiated &&
1942                      !guest_cpuid_has(vcpu, X86_FEATURE_MPX)))
1943                         return 1;
1944                 if (is_noncanonical_address(data & PAGE_MASK, vcpu) ||
1945                     (data & MSR_IA32_BNDCFGS_RSVD))
1946                         return 1;
1947                 vmcs_write64(GUEST_BNDCFGS, data);
1948                 break;
1949         case MSR_IA32_UMWAIT_CONTROL:
1950                 if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
1951                         return 1;
1952 
1953                 
1954                 if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
1955                         return 1;
1956 
1957                 vmx->msr_ia32_umwait_control = data;
1958                 break;
1959         case MSR_IA32_SPEC_CTRL:
1960                 if (!msr_info->host_initiated &&
1961                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
1962                         return 1;
1963 
1964                 
1965                 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
1966                         return 1;
1967 
1968                 vmx->spec_ctrl = data;
1969 
1970                 if (!data)
1971                         break;
1972 
1973                 
1974 
1975 
1976 
1977 
1978 
1979 
1980 
1981 
1982 
1983 
1984 
1985                 vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
1986                                               MSR_IA32_SPEC_CTRL,
1987                                               MSR_TYPE_RW);
1988                 break;
1989         case MSR_IA32_PRED_CMD:
1990                 if (!msr_info->host_initiated &&
1991                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
1992                         return 1;
1993 
1994                 if (data & ~PRED_CMD_IBPB)
1995                         return 1;
1996 
1997                 if (!data)
1998                         break;
1999 
2000                 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
2001 
2002                 
2003 
2004 
2005 
2006 
2007 
2008 
2009 
2010 
2011 
2012 
2013                 vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
2014                                               MSR_TYPE_W);
2015                 break;
2016         case MSR_IA32_CR_PAT:
2017                 if (!kvm_pat_valid(data))
2018                         return 1;
2019 
2020                 if (is_guest_mode(vcpu) &&
2021                     get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
2022                         get_vmcs12(vcpu)->guest_ia32_pat = data;
2023 
2024                 if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
2025                         vmcs_write64(GUEST_IA32_PAT, data);
2026                         vcpu->arch.pat = data;
2027                         break;
2028                 }
2029                 ret = kvm_set_msr_common(vcpu, msr_info);
2030                 break;
2031         case MSR_IA32_TSC_ADJUST:
2032                 ret = kvm_set_msr_common(vcpu, msr_info);
2033                 break;
2034         case MSR_IA32_MCG_EXT_CTL:
2035                 if ((!msr_info->host_initiated &&
2036                      !(to_vmx(vcpu)->msr_ia32_feature_control &
2037                        FEATURE_CONTROL_LMCE)) ||
2038                     (data & ~MCG_EXT_CTL_LMCE_EN))
2039                         return 1;
2040                 vcpu->arch.mcg_ext_ctl = data;
2041                 break;
2042         case MSR_IA32_FEATURE_CONTROL:
2043                 if (!vmx_feature_control_msr_valid(vcpu, data) ||
2044                     (to_vmx(vcpu)->msr_ia32_feature_control &
2045                      FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
2046                         return 1;
2047                 vmx->msr_ia32_feature_control = data;
2048                 if (msr_info->host_initiated && data == 0)
2049                         vmx_leave_nested(vcpu);
2050                 break;
2051         case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
2052                 if (!msr_info->host_initiated)
2053                         return 1; 
2054                 if (!nested_vmx_allowed(vcpu))
2055                         return 1;
2056                 return vmx_set_vmx_msr(vcpu, msr_index, data);
2057         case MSR_IA32_XSS:
2058                 if (!vmx_xsaves_supported() ||
2059                     (!msr_info->host_initiated &&
2060                      !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
2061                        guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
2062                         return 1;
2063                 
2064 
2065 
2066 
2067                 if (data != 0)
2068                         return 1;
2069                 vcpu->arch.ia32_xss = data;
2070                 if (vcpu->arch.ia32_xss != host_xss)
2071                         add_atomic_switch_msr(vmx, MSR_IA32_XSS,
2072                                 vcpu->arch.ia32_xss, host_xss, false);
2073                 else
2074                         clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
2075                 break;
2076         case MSR_IA32_RTIT_CTL:
2077                 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2078                         vmx_rtit_ctl_check(vcpu, data) ||
2079                         vmx->nested.vmxon)
2080                         return 1;
2081                 vmcs_write64(GUEST_IA32_RTIT_CTL, data);
2082                 vmx->pt_desc.guest.ctl = data;
2083                 pt_update_intercept_for_msr(vmx);
2084                 break;
2085         case MSR_IA32_RTIT_STATUS:
2086                 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2087                         (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2088                         (data & MSR_IA32_RTIT_STATUS_MASK))
2089                         return 1;
2090                 vmx->pt_desc.guest.status = data;
2091                 break;
2092         case MSR_IA32_RTIT_CR3_MATCH:
2093                 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2094                         (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2095                         !intel_pt_validate_cap(vmx->pt_desc.caps,
2096                                                 PT_CAP_cr3_filtering))
2097                         return 1;
2098                 vmx->pt_desc.guest.cr3_match = data;
2099                 break;
2100         case MSR_IA32_RTIT_OUTPUT_BASE:
2101                 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2102                         (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2103                         (!intel_pt_validate_cap(vmx->pt_desc.caps,
2104                                         PT_CAP_topa_output) &&
2105                          !intel_pt_validate_cap(vmx->pt_desc.caps,
2106                                         PT_CAP_single_range_output)) ||
2107                         (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK))
2108                         return 1;
2109                 vmx->pt_desc.guest.output_base = data;
2110                 break;
2111         case MSR_IA32_RTIT_OUTPUT_MASK:
2112                 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2113                         (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2114                         (!intel_pt_validate_cap(vmx->pt_desc.caps,
2115                                         PT_CAP_topa_output) &&
2116                          !intel_pt_validate_cap(vmx->pt_desc.caps,
2117                                         PT_CAP_single_range_output)))
2118                         return 1;
2119                 vmx->pt_desc.guest.output_mask = data;
2120                 break;
2121         case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
2122                 index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
2123                 if ((pt_mode != PT_MODE_HOST_GUEST) ||
2124                         (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
2125                         (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
2126                                         PT_CAP_num_address_ranges)))
2127                         return 1;
2128                 if (is_noncanonical_address(data, vcpu))
2129                         return 1;
2130                 if (index % 2)
2131                         vmx->pt_desc.guest.addr_b[index / 2] = data;
2132                 else
2133                         vmx->pt_desc.guest.addr_a[index / 2] = data;
2134                 break;
2135         case MSR_TSC_AUX:
2136                 if (!msr_info->host_initiated &&
2137                     !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
2138                         return 1;
2139                 
2140                 if ((data >> 32) != 0)
2141                         return 1;
2142                 
2143         default:
2144                 msr = find_msr_entry(vmx, msr_index);
2145                 if (msr) {
2146                         u64 old_msr_data = msr->data;
2147                         msr->data = data;
2148                         if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
2149                                 preempt_disable();
2150                                 ret = kvm_set_shared_msr(msr->index, msr->data,
2151                                                          msr->mask);
2152                                 preempt_enable();
2153                                 if (ret)
2154                                         msr->data = old_msr_data;
2155                         }
2156                         break;
2157                 }
2158                 ret = kvm_set_msr_common(vcpu, msr_info);
2159         }
2160 
2161         return ret;
2162 }
2163 
2164 static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
2165 {
2166         __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
2167         switch (reg) {
2168         case VCPU_REGS_RSP:
2169                 vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP);
2170                 break;
2171         case VCPU_REGS_RIP:
2172                 vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP);
2173                 break;
2174         case VCPU_EXREG_PDPTR:
2175                 if (enable_ept)
2176                         ept_save_pdptrs(vcpu);
2177                 break;
2178         default:
2179                 break;
2180         }
2181 }
2182 
2183 static __init int cpu_has_kvm_support(void)
2184 {
2185         return cpu_has_vmx();
2186 }
2187 
2188 static __init int vmx_disabled_by_bios(void)
2189 {
2190         u64 msr;
2191 
2192         rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
2193         if (msr & FEATURE_CONTROL_LOCKED) {
2194                 
2195                 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
2196                         && tboot_enabled())
2197                         return 1;
2198                 
2199                 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
2200                         && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX)
2201                         && !tboot_enabled()) {
2202                         printk(KERN_WARNING "kvm: disable TXT in the BIOS or "
2203                                 "activate TXT before enabling KVM\n");
2204                         return 1;
2205                 }
2206                 
2207                 if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX)
2208                         && !tboot_enabled())
2209                         return 1;
2210         }
2211 
2212         return 0;
2213 }
2214 
2215 static void kvm_cpu_vmxon(u64 addr)
2216 {
2217         cr4_set_bits(X86_CR4_VMXE);
2218         intel_pt_handle_vmx(1);
2219 
2220         asm volatile ("vmxon %0" : : "m"(addr));
2221 }
2222 
2223 static int hardware_enable(void)
2224 {
2225         int cpu = raw_smp_processor_id();
2226         u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
2227         u64 old, test_bits;
2228 
2229         if (cr4_read_shadow() & X86_CR4_VMXE)
2230                 return -EBUSY;
2231 
2232         
2233 
2234 
2235 
2236         if (static_branch_unlikely(&enable_evmcs) &&
2237             !hv_get_vp_assist_page(cpu))
2238                 return -EFAULT;
2239 
2240         rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
2241 
2242         test_bits = FEATURE_CONTROL_LOCKED;
2243         test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
2244         if (tboot_enabled())
2245                 test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX;
2246 
2247         if ((old & test_bits) != test_bits) {
2248                 
2249                 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
2250         }
2251         kvm_cpu_vmxon(phys_addr);
2252         if (enable_ept)
2253                 ept_sync_global();
2254 
2255         return 0;
2256 }
2257 
2258 static void vmclear_local_loaded_vmcss(void)
2259 {
2260         int cpu = raw_smp_processor_id();
2261         struct loaded_vmcs *v, *n;
2262 
2263         list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
2264                                  loaded_vmcss_on_cpu_link)
2265                 __loaded_vmcs_clear(v);
2266 }
2267 
2268 
2269 
2270 
2271 
2272 static void kvm_cpu_vmxoff(void)
2273 {
2274         asm volatile (__ex("vmxoff"));
2275 
2276         intel_pt_handle_vmx(0);
2277         cr4_clear_bits(X86_CR4_VMXE);
2278 }
2279 
2280 static void hardware_disable(void)
2281 {
2282         vmclear_local_loaded_vmcss();
2283         kvm_cpu_vmxoff();
2284 }
2285 
2286 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
2287                                       u32 msr, u32 *result)
2288 {
2289         u32 vmx_msr_low, vmx_msr_high;
2290         u32 ctl = ctl_min | ctl_opt;
2291 
2292         rdmsr(msr, vmx_msr_low, vmx_msr_high);
2293 
2294         ctl &= vmx_msr_high; 
2295         ctl |= vmx_msr_low;  
2296 
2297         
2298         if (ctl_min & ~ctl)
2299                 return -EIO;
2300 
2301         *result = ctl;
2302         return 0;
2303 }
2304 
2305 static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
2306                                     struct vmx_capability *vmx_cap)
2307 {
2308         u32 vmx_msr_low, vmx_msr_high;
2309         u32 min, opt, min2, opt2;
2310         u32 _pin_based_exec_control = 0;
2311         u32 _cpu_based_exec_control = 0;
2312         u32 _cpu_based_2nd_exec_control = 0;
2313         u32 _vmexit_control = 0;
2314         u32 _vmentry_control = 0;
2315 
2316         memset(vmcs_conf, 0, sizeof(*vmcs_conf));
2317         min = CPU_BASED_HLT_EXITING |
2318 #ifdef CONFIG_X86_64
2319               CPU_BASED_CR8_LOAD_EXITING |
2320               CPU_BASED_CR8_STORE_EXITING |
2321 #endif
2322               CPU_BASED_CR3_LOAD_EXITING |
2323               CPU_BASED_CR3_STORE_EXITING |
2324               CPU_BASED_UNCOND_IO_EXITING |
2325               CPU_BASED_MOV_DR_EXITING |
2326               CPU_BASED_USE_TSC_OFFSETING |
2327               CPU_BASED_MWAIT_EXITING |
2328               CPU_BASED_MONITOR_EXITING |
2329               CPU_BASED_INVLPG_EXITING |
2330               CPU_BASED_RDPMC_EXITING;
2331 
2332         opt = CPU_BASED_TPR_SHADOW |
2333               CPU_BASED_USE_MSR_BITMAPS |
2334               CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
2335         if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
2336                                 &_cpu_based_exec_control) < 0)
2337                 return -EIO;
2338 #ifdef CONFIG_X86_64
2339         if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
2340                 _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
2341                                            ~CPU_BASED_CR8_STORE_EXITING;
2342 #endif
2343         if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
2344                 min2 = 0;
2345                 opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
2346                         SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2347                         SECONDARY_EXEC_WBINVD_EXITING |
2348                         SECONDARY_EXEC_ENABLE_VPID |
2349                         SECONDARY_EXEC_ENABLE_EPT |
2350                         SECONDARY_EXEC_UNRESTRICTED_GUEST |
2351                         SECONDARY_EXEC_PAUSE_LOOP_EXITING |
2352                         SECONDARY_EXEC_DESC |
2353                         SECONDARY_EXEC_RDTSCP |
2354                         SECONDARY_EXEC_ENABLE_INVPCID |
2355                         SECONDARY_EXEC_APIC_REGISTER_VIRT |
2356                         SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2357                         SECONDARY_EXEC_SHADOW_VMCS |
2358                         SECONDARY_EXEC_XSAVES |
2359                         SECONDARY_EXEC_RDSEED_EXITING |
2360                         SECONDARY_EXEC_RDRAND_EXITING |
2361                         SECONDARY_EXEC_ENABLE_PML |
2362                         SECONDARY_EXEC_TSC_SCALING |
2363                         SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
2364                         SECONDARY_EXEC_PT_USE_GPA |
2365                         SECONDARY_EXEC_PT_CONCEAL_VMX |
2366                         SECONDARY_EXEC_ENABLE_VMFUNC |
2367                         SECONDARY_EXEC_ENCLS_EXITING;
2368                 if (adjust_vmx_controls(min2, opt2,
2369                                         MSR_IA32_VMX_PROCBASED_CTLS2,
2370                                         &_cpu_based_2nd_exec_control) < 0)
2371                         return -EIO;
2372         }
2373 #ifndef CONFIG_X86_64
2374         if (!(_cpu_based_2nd_exec_control &
2375                                 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
2376                 _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
2377 #endif
2378 
2379         if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
2380                 _cpu_based_2nd_exec_control &= ~(
2381                                 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2382                                 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
2383                                 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
2384 
2385         rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
2386                 &vmx_cap->ept, &vmx_cap->vpid);
2387 
2388         if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
2389                 
2390 
2391                 _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
2392                                              CPU_BASED_CR3_STORE_EXITING |
2393                                              CPU_BASED_INVLPG_EXITING);
2394         } else if (vmx_cap->ept) {
2395                 vmx_cap->ept = 0;
2396                 pr_warn_once("EPT CAP should not exist if not support "
2397                                 "1-setting enable EPT VM-execution control\n");
2398         }
2399         if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_VPID) &&
2400                 vmx_cap->vpid) {
2401                 vmx_cap->vpid = 0;
2402                 pr_warn_once("VPID CAP should not exist if not support "
2403                                 "1-setting enable VPID VM-execution control\n");
2404         }
2405 
2406         min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
2407 #ifdef CONFIG_X86_64
2408         min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
2409 #endif
2410         opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
2411               VM_EXIT_LOAD_IA32_PAT |
2412               VM_EXIT_LOAD_IA32_EFER |
2413               VM_EXIT_CLEAR_BNDCFGS |
2414               VM_EXIT_PT_CONCEAL_PIP |
2415               VM_EXIT_CLEAR_IA32_RTIT_CTL;
2416         if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
2417                                 &_vmexit_control) < 0)
2418                 return -EIO;
2419 
2420         min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
2421         opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
2422                  PIN_BASED_VMX_PREEMPTION_TIMER;
2423         if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
2424                                 &_pin_based_exec_control) < 0)
2425                 return -EIO;
2426 
2427         if (cpu_has_broken_vmx_preemption_timer())
2428                 _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
2429         if (!(_cpu_based_2nd_exec_control &
2430                 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
2431                 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
2432 
2433         min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
2434         opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
2435               VM_ENTRY_LOAD_IA32_PAT |
2436               VM_ENTRY_LOAD_IA32_EFER |
2437               VM_ENTRY_LOAD_BNDCFGS |
2438               VM_ENTRY_PT_CONCEAL_PIP |
2439               VM_ENTRY_LOAD_IA32_RTIT_CTL;
2440         if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
2441                                 &_vmentry_control) < 0)
2442                 return -EIO;
2443 
2444         
2445 
2446 
2447 
2448 
2449 
2450         if (boot_cpu_data.x86 == 0x6) {
2451                 switch (boot_cpu_data.x86_model) {
2452                 case 26: 
2453                 case 30: 
2454                 case 37: 
2455                 case 44: 
2456                 case 46: 
2457                         _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
2458                         _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
2459                         pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
2460                                         "does not work properly. Using workaround\n");
2461                         break;
2462                 default:
2463                         break;
2464                 }
2465         }
2466 
2467 
2468         rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
2469 
2470         
2471         if ((vmx_msr_high & 0x1fff) > PAGE_SIZE)
2472                 return -EIO;
2473 
2474 #ifdef CONFIG_X86_64
2475         
2476         if (vmx_msr_high & (1u<<16))
2477                 return -EIO;
2478 #endif
2479 
2480         
2481         if (((vmx_msr_high >> 18) & 15) != 6)
2482                 return -EIO;
2483 
2484         vmcs_conf->size = vmx_msr_high & 0x1fff;
2485         vmcs_conf->order = get_order(vmcs_conf->size);
2486         vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
2487 
2488         vmcs_conf->revision_id = vmx_msr_low;
2489 
2490         vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
2491         vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
2492         vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
2493         vmcs_conf->vmexit_ctrl         = _vmexit_control;
2494         vmcs_conf->vmentry_ctrl        = _vmentry_control;
2495 
2496         if (static_branch_unlikely(&enable_evmcs))
2497                 evmcs_sanitize_exec_ctrls(vmcs_conf);
2498 
2499         return 0;
2500 }
2501 
2502 struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
2503 {
2504         int node = cpu_to_node(cpu);
2505         struct page *pages;
2506         struct vmcs *vmcs;
2507 
2508         pages = __alloc_pages_node(node, flags, vmcs_config.order);
2509         if (!pages)
2510                 return NULL;
2511         vmcs = page_address(pages);
2512         memset(vmcs, 0, vmcs_config.size);
2513 
2514         
2515         if (static_branch_unlikely(&enable_evmcs))
2516                 vmcs->hdr.revision_id = KVM_EVMCS_VERSION;
2517         else
2518                 vmcs->hdr.revision_id = vmcs_config.revision_id;
2519 
2520         if (shadow)
2521                 vmcs->hdr.shadow_vmcs = 1;
2522         return vmcs;
2523 }
2524 
2525 void free_vmcs(struct vmcs *vmcs)
2526 {
2527         free_pages((unsigned long)vmcs, vmcs_config.order);
2528 }
2529 
2530 
2531 
2532 
2533 void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
2534 {
2535         if (!loaded_vmcs->vmcs)
2536                 return;
2537         loaded_vmcs_clear(loaded_vmcs);
2538         free_vmcs(loaded_vmcs->vmcs);
2539         loaded_vmcs->vmcs = NULL;
2540         if (loaded_vmcs->msr_bitmap)
2541                 free_page((unsigned long)loaded_vmcs->msr_bitmap);
2542         WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
2543 }
2544 
2545 int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
2546 {
2547         loaded_vmcs->vmcs = alloc_vmcs(false);
2548         if (!loaded_vmcs->vmcs)
2549                 return -ENOMEM;
2550 
2551         loaded_vmcs->shadow_vmcs = NULL;
2552         loaded_vmcs->hv_timer_soft_disabled = false;
2553         loaded_vmcs_init(loaded_vmcs);
2554 
2555         if (cpu_has_vmx_msr_bitmap()) {
2556                 loaded_vmcs->msr_bitmap = (unsigned long *)
2557                                 __get_free_page(GFP_KERNEL_ACCOUNT);
2558                 if (!loaded_vmcs->msr_bitmap)
2559                         goto out_vmcs;
2560                 memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
2561 
2562                 if (IS_ENABLED(CONFIG_HYPERV) &&
2563                     static_branch_unlikely(&enable_evmcs) &&
2564                     (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
2565                         struct hv_enlightened_vmcs *evmcs =
2566                                 (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
2567 
2568                         evmcs->hv_enlightenments_control.msr_bitmap = 1;
2569                 }
2570         }
2571 
2572         memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
2573         memset(&loaded_vmcs->controls_shadow, 0,
2574                 sizeof(struct vmcs_controls_shadow));
2575 
2576         return 0;
2577 
2578 out_vmcs:
2579         free_loaded_vmcs(loaded_vmcs);
2580         return -ENOMEM;
2581 }
2582 
2583 static void free_kvm_area(void)
2584 {
2585         int cpu;
2586 
2587         for_each_possible_cpu(cpu) {
2588                 free_vmcs(per_cpu(vmxarea, cpu));
2589                 per_cpu(vmxarea, cpu) = NULL;
2590         }
2591 }
2592 
2593 static __init int alloc_kvm_area(void)
2594 {
2595         int cpu;
2596 
2597         for_each_possible_cpu(cpu) {
2598                 struct vmcs *vmcs;
2599 
2600                 vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL);
2601                 if (!vmcs) {
2602                         free_kvm_area();
2603                         return -ENOMEM;
2604                 }
2605 
2606                 
2607 
2608 
2609 
2610 
2611 
2612 
2613 
2614 
2615 
2616                 if (static_branch_unlikely(&enable_evmcs))
2617                         vmcs->hdr.revision_id = vmcs_config.revision_id;
2618 
2619                 per_cpu(vmxarea, cpu) = vmcs;
2620         }
2621         return 0;
2622 }
2623 
2624 static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
2625                 struct kvm_segment *save)
2626 {
2627         if (!emulate_invalid_guest_state) {
2628                 
2629 
2630 
2631 
2632 
2633 
2634 
2635                 if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
2636                         save->selector &= ~SEGMENT_RPL_MASK;
2637                 save->dpl = save->selector & SEGMENT_RPL_MASK;
2638                 save->s = 1;
2639         }
2640         vmx_set_segment(vcpu, save, seg);
2641 }
2642 
2643 static void enter_pmode(struct kvm_vcpu *vcpu)
2644 {
2645         unsigned long flags;
2646         struct vcpu_vmx *vmx = to_vmx(vcpu);
2647 
2648         
2649 
2650 
2651 
2652         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2653         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2654         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2655         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2656         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2657         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2658 
2659         vmx->rmode.vm86_active = 0;
2660 
2661         vmx_segment_cache_clear(vmx);
2662 
2663         vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
2664 
2665         flags = vmcs_readl(GUEST_RFLAGS);
2666         flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
2667         flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
2668         vmcs_writel(GUEST_RFLAGS, flags);
2669 
2670         vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
2671                         (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME));
2672 
2673         update_exception_bitmap(vcpu);
2674 
2675         fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
2676         fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
2677         fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2678         fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2679         fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2680         fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2681 }
2682 
2683 static void fix_rmode_seg(int seg, struct kvm_segment *save)
2684 {
2685         const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
2686         struct kvm_segment var = *save;
2687 
2688         var.dpl = 0x3;
2689         if (seg == VCPU_SREG_CS)
2690                 var.type = 0x3;
2691 
2692         if (!emulate_invalid_guest_state) {
2693                 var.selector = var.base >> 4;
2694                 var.base = var.base & 0xffff0;
2695                 var.limit = 0xffff;
2696                 var.g = 0;
2697                 var.db = 0;
2698                 var.present = 1;
2699                 var.s = 1;
2700                 var.l = 0;
2701                 var.unusable = 0;
2702                 var.type = 0x3;
2703                 var.avl = 0;
2704                 if (save->base & 0xf)
2705                         printk_once(KERN_WARNING "kvm: segment base is not "
2706                                         "paragraph aligned when entering "
2707                                         "protected mode (seg=%d)", seg);
2708         }
2709 
2710         vmcs_write16(sf->selector, var.selector);
2711         vmcs_writel(sf->base, var.base);
2712         vmcs_write32(sf->limit, var.limit);
2713         vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var));
2714 }
2715 
2716 static void enter_rmode(struct kvm_vcpu *vcpu)
2717 {
2718         unsigned long flags;
2719         struct vcpu_vmx *vmx = to_vmx(vcpu);
2720         struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
2721 
2722         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
2723         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
2724         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
2725         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
2726         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
2727         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS);
2728         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS);
2729 
2730         vmx->rmode.vm86_active = 1;
2731 
2732         
2733 
2734 
2735 
2736         if (!kvm_vmx->tss_addr)
2737                 printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
2738                              "called before entering vcpu\n");
2739 
2740         vmx_segment_cache_clear(vmx);
2741 
2742         vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
2743         vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
2744         vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
2745 
2746         flags = vmcs_readl(GUEST_RFLAGS);
2747         vmx->rmode.save_rflags = flags;
2748 
2749         flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
2750 
2751         vmcs_writel(GUEST_RFLAGS, flags);
2752         vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
2753         update_exception_bitmap(vcpu);
2754 
2755         fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]);
2756         fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]);
2757         fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
2758         fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
2759         fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
2760         fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
2761 
2762         kvm_mmu_reset_context(vcpu);
2763 }
2764 
2765 void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
2766 {
2767         struct vcpu_vmx *vmx = to_vmx(vcpu);
2768         struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
2769 
2770         if (!msr)
2771                 return;
2772 
2773         vcpu->arch.efer = efer;
2774         if (efer & EFER_LMA) {
2775                 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
2776                 msr->data = efer;
2777         } else {
2778                 vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
2779 
2780                 msr->data = efer & ~EFER_LME;
2781         }
2782         setup_msrs(vmx);
2783 }
2784 
2785 #ifdef CONFIG_X86_64
2786 
2787 static void enter_lmode(struct kvm_vcpu *vcpu)
2788 {
2789         u32 guest_tr_ar;
2790 
2791         vmx_segment_cache_clear(to_vmx(vcpu));
2792 
2793         guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
2794         if ((guest_tr_ar & VMX_AR_TYPE_MASK) != VMX_AR_TYPE_BUSY_64_TSS) {
2795                 pr_debug_ratelimited("%s: tss fixup for long mode. \n",
2796                                      __func__);
2797                 vmcs_write32(GUEST_TR_AR_BYTES,
2798                              (guest_tr_ar & ~VMX_AR_TYPE_MASK)
2799                              | VMX_AR_TYPE_BUSY_64_TSS);
2800         }
2801         vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
2802 }
2803 
2804 static void exit_lmode(struct kvm_vcpu *vcpu)
2805 {
2806         vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
2807         vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
2808 }
2809 
2810 #endif
2811 
2812 static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
2813 {
2814         int vpid = to_vmx(vcpu)->vpid;
2815 
2816         if (!vpid_sync_vcpu_addr(vpid, addr))
2817                 vpid_sync_context(vpid);
2818 
2819         
2820 
2821 
2822 
2823 
2824 }
2825 
2826 static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
2827 {
2828         ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
2829 
2830         vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
2831         vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
2832 }
2833 
2834 static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
2835 {
2836         if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
2837                 vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
2838         __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
2839 }
2840 
2841 static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
2842 {
2843         ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
2844 
2845         vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
2846         vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
2847 }
2848 
2849 static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
2850 {
2851         struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
2852 
2853         if (!test_bit(VCPU_EXREG_PDPTR,
2854                       (unsigned long *)&vcpu->arch.regs_dirty))
2855                 return;
2856 
2857         if (is_pae_paging(vcpu)) {
2858                 vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
2859                 vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
2860                 vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
2861                 vmcs_write64(GUEST_PDPTR3, mmu->pdptrs[3]);
2862         }
2863 }
2864 
2865 void ept_save_pdptrs(struct kvm_vcpu *vcpu)
2866 {
2867         struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
2868 
2869         if (is_pae_paging(vcpu)) {
2870                 mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
2871                 mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
2872                 mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
2873                 mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
2874         }
2875 
2876         __set_bit(VCPU_EXREG_PDPTR,
2877                   (unsigned long *)&vcpu->arch.regs_avail);
2878         __set_bit(VCPU_EXREG_PDPTR,
2879                   (unsigned long *)&vcpu->arch.regs_dirty);
2880 }
2881 
2882 static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
2883                                         unsigned long cr0,
2884                                         struct kvm_vcpu *vcpu)
2885 {
2886         struct vcpu_vmx *vmx = to_vmx(vcpu);
2887 
2888         if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
2889                 vmx_decache_cr3(vcpu);
2890         if (!(cr0 & X86_CR0_PG)) {
2891                 
2892                 exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
2893                                           CPU_BASED_CR3_STORE_EXITING);
2894                 vcpu->arch.cr0 = cr0;
2895                 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
2896         } else if (!is_paging(vcpu)) {
2897                 
2898                 exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
2899                                             CPU_BASED_CR3_STORE_EXITING);
2900                 vcpu->arch.cr0 = cr0;
2901                 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
2902         }
2903 
2904         if (!(cr0 & X86_CR0_WP))
2905                 *hw_cr0 &= ~X86_CR0_WP;
2906 }
2907 
2908 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
2909 {
2910         struct vcpu_vmx *vmx = to_vmx(vcpu);
2911         unsigned long hw_cr0;
2912 
2913         hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
2914         if (enable_unrestricted_guest)
2915                 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
2916         else {
2917                 hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
2918 
2919                 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
2920                         enter_pmode(vcpu);
2921 
2922                 if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE))
2923                         enter_rmode(vcpu);
2924         }
2925 
2926 #ifdef CONFIG_X86_64
2927         if (vcpu->arch.efer & EFER_LME) {
2928                 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
2929                         enter_lmode(vcpu);
2930                 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
2931                         exit_lmode(vcpu);
2932         }
2933 #endif
2934 
2935         if (enable_ept && !enable_unrestricted_guest)
2936                 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
2937 
2938         vmcs_writel(CR0_READ_SHADOW, cr0);
2939         vmcs_writel(GUEST_CR0, hw_cr0);
2940         vcpu->arch.cr0 = cr0;
2941 
2942         
2943         vmx->emulation_required = emulation_required(vcpu);
2944 }
2945 
2946 static int get_ept_level(struct kvm_vcpu *vcpu)
2947 {
2948         
2949         if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
2950                 return 4;
2951         if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
2952                 return 5;
2953         return 4;
2954 }
2955 
2956 u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa)
2957 {
2958         u64 eptp = VMX_EPTP_MT_WB;
2959 
2960         eptp |= (get_ept_level(vcpu) == 5) ? VMX_EPTP_PWL_5 : VMX_EPTP_PWL_4;
2961 
2962         if (enable_ept_ad_bits &&
2963             (!is_guest_mode(vcpu) || nested_ept_ad_enabled(vcpu)))
2964                 eptp |= VMX_EPTP_AD_ENABLE_BIT;
2965         eptp |= (root_hpa & PAGE_MASK);
2966 
2967         return eptp;
2968 }
2969 
2970 void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
2971 {
2972         struct kvm *kvm = vcpu->kvm;
2973         bool update_guest_cr3 = true;
2974         unsigned long guest_cr3;
2975         u64 eptp;
2976 
2977         guest_cr3 = cr3;
2978         if (enable_ept) {
2979                 eptp = construct_eptp(vcpu, cr3);
2980                 vmcs_write64(EPT_POINTER, eptp);
2981 
2982                 if (kvm_x86_ops->tlb_remote_flush) {
2983                         spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
2984                         to_vmx(vcpu)->ept_pointer = eptp;
2985                         to_kvm_vmx(kvm)->ept_pointers_match
2986                                 = EPT_POINTERS_CHECK;
2987                         spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
2988                 }
2989 
2990                 
2991                 if (is_guest_mode(vcpu))
2992                         update_guest_cr3 = false;
2993                 else if (enable_unrestricted_guest || is_paging(vcpu))
2994                         guest_cr3 = kvm_read_cr3(vcpu);
2995                 else
2996                         guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
2997                 ept_load_pdptrs(vcpu);
2998         }
2999 
3000         if (update_guest_cr3)
3001                 vmcs_writel(GUEST_CR3, guest_cr3);
3002 }
3003 
3004 int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
3005 {
3006         struct vcpu_vmx *vmx = to_vmx(vcpu);
3007         
3008 
3009 
3010 
3011 
3012         unsigned long hw_cr4;
3013 
3014         hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
3015         if (enable_unrestricted_guest)
3016                 hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
3017         else if (vmx->rmode.vm86_active)
3018                 hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
3019         else
3020                 hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
3021 
3022         if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
3023                 if (cr4 & X86_CR4_UMIP) {
3024                         secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
3025                         hw_cr4 &= ~X86_CR4_UMIP;
3026                 } else if (!is_guest_mode(vcpu) ||
3027                         !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) {
3028                         secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
3029                 }
3030         }
3031 
3032         if (cr4 & X86_CR4_VMXE) {
3033                 
3034 
3035 
3036 
3037 
3038 
3039 
3040                 if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
3041                         return 1;
3042         }
3043 
3044         if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
3045                 return 1;
3046 
3047         vcpu->arch.cr4 = cr4;
3048 
3049         if (!enable_unrestricted_guest) {
3050                 if (enable_ept) {
3051                         if (!is_paging(vcpu)) {
3052                                 hw_cr4 &= ~X86_CR4_PAE;
3053                                 hw_cr4 |= X86_CR4_PSE;
3054                         } else if (!(cr4 & X86_CR4_PAE)) {
3055                                 hw_cr4 &= ~X86_CR4_PAE;
3056                         }
3057                 }
3058 
3059                 
3060 
3061 
3062 
3063 
3064 
3065 
3066 
3067 
3068 
3069 
3070                 if (!is_paging(vcpu))
3071                         hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
3072         }
3073 
3074         vmcs_writel(CR4_READ_SHADOW, cr4);
3075         vmcs_writel(GUEST_CR4, hw_cr4);
3076         return 0;
3077 }
3078 
3079 void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
3080 {
3081         struct vcpu_vmx *vmx = to_vmx(vcpu);
3082         u32 ar;
3083 
3084         if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3085                 *var = vmx->rmode.segs[seg];
3086                 if (seg == VCPU_SREG_TR
3087                     || var->selector == vmx_read_guest_seg_selector(vmx, seg))
3088                         return;
3089                 var->base = vmx_read_guest_seg_base(vmx, seg);
3090                 var->selector = vmx_read_guest_seg_selector(vmx, seg);
3091                 return;
3092         }
3093         var->base = vmx_read_guest_seg_base(vmx, seg);
3094         var->limit = vmx_read_guest_seg_limit(vmx, seg);
3095         var->selector = vmx_read_guest_seg_selector(vmx, seg);
3096         ar = vmx_read_guest_seg_ar(vmx, seg);
3097         var->unusable = (ar >> 16) & 1;
3098         var->type = ar & 15;
3099         var->s = (ar >> 4) & 1;
3100         var->dpl = (ar >> 5) & 3;
3101         
3102 
3103 
3104 
3105 
3106 
3107 
3108         var->present = !var->unusable;
3109         var->avl = (ar >> 12) & 1;
3110         var->l = (ar >> 13) & 1;
3111         var->db = (ar >> 14) & 1;
3112         var->g = (ar >> 15) & 1;
3113 }
3114 
3115 static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
3116 {
3117         struct kvm_segment s;
3118 
3119         if (to_vmx(vcpu)->rmode.vm86_active) {
3120                 vmx_get_segment(vcpu, &s, seg);
3121                 return s.base;
3122         }
3123         return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
3124 }
3125 
3126 int vmx_get_cpl(struct kvm_vcpu *vcpu)
3127 {
3128         struct vcpu_vmx *vmx = to_vmx(vcpu);
3129 
3130         if (unlikely(vmx->rmode.vm86_active))
3131                 return 0;
3132         else {
3133                 int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
3134                 return VMX_AR_DPL(ar);
3135         }
3136 }
3137 
3138 static u32 vmx_segment_access_rights(struct kvm_segment *var)
3139 {
3140         u32 ar;
3141 
3142         if (var->unusable || !var->present)
3143                 ar = 1 << 16;
3144         else {
3145                 ar = var->type & 15;
3146                 ar |= (var->s & 1) << 4;
3147                 ar |= (var->dpl & 3) << 5;
3148                 ar |= (var->present & 1) << 7;
3149                 ar |= (var->avl & 1) << 12;
3150                 ar |= (var->l & 1) << 13;
3151                 ar |= (var->db & 1) << 14;
3152                 ar |= (var->g & 1) << 15;
3153         }
3154 
3155         return ar;
3156 }
3157 
3158 void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg)
3159 {
3160         struct vcpu_vmx *vmx = to_vmx(vcpu);
3161         const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3162 
3163         vmx_segment_cache_clear(vmx);
3164 
3165         if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) {
3166                 vmx->rmode.segs[seg] = *var;
3167                 if (seg == VCPU_SREG_TR)
3168                         vmcs_write16(sf->selector, var->selector);
3169                 else if (var->s)
3170                         fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
3171                 goto out;
3172         }
3173 
3174         vmcs_writel(sf->base, var->base);
3175         vmcs_write32(sf->limit, var->limit);
3176         vmcs_write16(sf->selector, var->selector);
3177 
3178         
3179 
3180 
3181 
3182 
3183 
3184 
3185 
3186 
3187 
3188 
3189         if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR))
3190                 var->type |= 0x1; 
3191 
3192         vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
3193 
3194 out:
3195         vmx->emulation_required = emulation_required(vcpu);
3196 }
3197 
3198 static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
3199 {
3200         u32 ar = vmx_read_guest_seg_ar(to_vmx(vcpu), VCPU_SREG_CS);
3201 
3202         *db = (ar >> 14) & 1;
3203         *l = (ar >> 13) & 1;
3204 }
3205 
3206 static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3207 {
3208         dt->size = vmcs_read32(GUEST_IDTR_LIMIT);
3209         dt->address = vmcs_readl(GUEST_IDTR_BASE);
3210 }
3211 
3212 static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3213 {
3214         vmcs_write32(GUEST_IDTR_LIMIT, dt->size);
3215         vmcs_writel(GUEST_IDTR_BASE, dt->address);
3216 }
3217 
3218 static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3219 {
3220         dt->size = vmcs_read32(GUEST_GDTR_LIMIT);
3221         dt->address = vmcs_readl(GUEST_GDTR_BASE);
3222 }
3223 
3224 static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
3225 {
3226         vmcs_write32(GUEST_GDTR_LIMIT, dt->size);
3227         vmcs_writel(GUEST_GDTR_BASE, dt->address);
3228 }
3229 
3230 static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
3231 {
3232         struct kvm_segment var;
3233         u32 ar;
3234 
3235         vmx_get_segment(vcpu, &var, seg);
3236         var.dpl = 0x3;
3237         if (seg == VCPU_SREG_CS)
3238                 var.type = 0x3;
3239         ar = vmx_segment_access_rights(&var);
3240 
3241         if (var.base != (var.selector << 4))
3242                 return false;
3243         if (var.limit != 0xffff)
3244                 return false;
3245         if (ar != 0xf3)
3246                 return false;
3247 
3248         return true;
3249 }
3250 
3251 static bool code_segment_valid(struct kvm_vcpu *vcpu)
3252 {
3253         struct kvm_segment cs;
3254         unsigned int cs_rpl;
3255 
3256         vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
3257         cs_rpl = cs.selector & SEGMENT_RPL_MASK;
3258 
3259         if (cs.unusable)
3260                 return false;
3261         if (~cs.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_ACCESSES_MASK))
3262                 return false;
3263         if (!cs.s)
3264                 return false;
3265         if (cs.type & VMX_AR_TYPE_WRITEABLE_MASK) {
3266                 if (cs.dpl > cs_rpl)
3267                         return false;
3268         } else {
3269                 if (cs.dpl != cs_rpl)
3270                         return false;
3271         }
3272         if (!cs.present)
3273                 return false;
3274 
3275         
3276         return true;
3277 }
3278 
3279 static bool stack_segment_valid(struct kvm_vcpu *vcpu)
3280 {
3281         struct kvm_segment ss;
3282         unsigned int ss_rpl;
3283 
3284         vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
3285         ss_rpl = ss.selector & SEGMENT_RPL_MASK;
3286 
3287         if (ss.unusable)
3288                 return true;
3289         if (ss.type != 3 && ss.type != 7)
3290                 return false;
3291         if (!ss.s)
3292                 return false;
3293         if (ss.dpl != ss_rpl) 
3294                 return false;
3295         if (!ss.present)
3296                 return false;
3297 
3298         return true;
3299 }
3300 
3301 static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
3302 {
3303         struct kvm_segment var;
3304         unsigned int rpl;
3305 
3306         vmx_get_segment(vcpu, &var, seg);
3307         rpl = var.selector & SEGMENT_RPL_MASK;
3308 
3309         if (var.unusable)
3310                 return true;
3311         if (!var.s)
3312                 return false;
3313         if (!var.present)
3314                 return false;
3315         if (~var.type & (VMX_AR_TYPE_CODE_MASK|VMX_AR_TYPE_WRITEABLE_MASK)) {
3316                 if (var.dpl < rpl) 
3317                         return false;
3318         }
3319 
3320         
3321 
3322 
3323         return true;
3324 }
3325 
3326 static bool tr_valid(struct kvm_vcpu *vcpu)
3327 {
3328         struct kvm_segment tr;
3329 
3330         vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
3331 
3332         if (tr.unusable)
3333                 return false;
3334         if (tr.selector & SEGMENT_TI_MASK)      
3335                 return false;
3336         if (tr.type != 3 && tr.type != 11) 
3337                 return false;
3338         if (!tr.present)
3339                 return false;
3340 
3341         return true;
3342 }
3343 
3344 static bool ldtr_valid(struct kvm_vcpu *vcpu)
3345 {
3346         struct kvm_segment ldtr;
3347 
3348         vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
3349 
3350         if (ldtr.unusable)
3351                 return true;
3352         if (ldtr.selector & SEGMENT_TI_MASK)    
3353                 return false;
3354         if (ldtr.type != 2)
3355                 return false;
3356         if (!ldtr.present)
3357                 return false;
3358 
3359         return true;
3360 }
3361 
3362 static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
3363 {
3364         struct kvm_segment cs, ss;
3365 
3366         vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
3367         vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
3368 
3369         return ((cs.selector & SEGMENT_RPL_MASK) ==
3370                  (ss.selector & SEGMENT_RPL_MASK));
3371 }
3372 
3373 
3374 
3375 
3376 
3377 
3378 static bool guest_state_valid(struct kvm_vcpu *vcpu)
3379 {
3380         if (enable_unrestricted_guest)
3381                 return true;
3382 
3383         
3384         if (!is_protmode(vcpu) || (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
3385                 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
3386                         return false;
3387                 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
3388                         return false;
3389                 if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
3390                         return false;
3391                 if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
3392                         return false;
3393                 if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
3394                         return false;
3395                 if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
3396                         return false;
3397         } else {
3398         
3399                 if (!cs_ss_rpl_check(vcpu))
3400                         return false;
3401                 if (!code_segment_valid(vcpu))
3402                         return false;
3403                 if (!stack_segment_valid(vcpu))
3404                         return false;
3405                 if (!data_segment_valid(vcpu, VCPU_SREG_DS))
3406                         return false;
3407                 if (!data_segment_valid(vcpu, VCPU_SREG_ES))
3408                         return false;
3409                 if (!data_segment_valid(vcpu, VCPU_SREG_FS))
3410                         return false;
3411                 if (!data_segment_valid(vcpu, VCPU_SREG_GS))
3412                         return false;
3413                 if (!tr_valid(vcpu))
3414                         return false;
3415                 if (!ldtr_valid(vcpu))
3416                         return false;
3417         }
3418         
3419 
3420 
3421 
3422 
3423         return true;
3424 }
3425 
3426 static int init_rmode_tss(struct kvm *kvm)
3427 {
3428         gfn_t fn;
3429         u16 data = 0;
3430         int idx, r;
3431 
3432         idx = srcu_read_lock(&kvm->srcu);
3433         fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
3434         r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
3435         if (r < 0)
3436                 goto out;
3437         data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE;
3438         r = kvm_write_guest_page(kvm, fn++, &data,
3439                         TSS_IOPB_BASE_OFFSET, sizeof(u16));
3440         if (r < 0)
3441                 goto out;
3442         r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE);
3443         if (r < 0)
3444                 goto out;
3445         r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
3446         if (r < 0)
3447                 goto out;
3448         data = ~0;
3449         r = kvm_write_guest_page(kvm, fn, &data,
3450                                  RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
3451                                  sizeof(u8));
3452 out:
3453         srcu_read_unlock(&kvm->srcu, idx);
3454         return r;
3455 }
3456 
3457 static int init_rmode_identity_map(struct kvm *kvm)
3458 {
3459         struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
3460         int i, idx, r = 0;
3461         kvm_pfn_t identity_map_pfn;
3462         u32 tmp;
3463 
3464         
3465         mutex_lock(&kvm->slots_lock);
3466 
3467         if (likely(kvm_vmx->ept_identity_pagetable_done))
3468                 goto out2;
3469 
3470         if (!kvm_vmx->ept_identity_map_addr)
3471                 kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
3472         identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
3473 
3474         r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
3475                                     kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
3476         if (r < 0)
3477                 goto out2;
3478 
3479         idx = srcu_read_lock(&kvm->srcu);
3480         r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
3481         if (r < 0)
3482                 goto out;
3483         
3484         for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
3485                 tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
3486                         _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
3487                 r = kvm_write_guest_page(kvm, identity_map_pfn,
3488                                 &tmp, i * sizeof(tmp), sizeof(tmp));
3489                 if (r < 0)
3490                         goto out;
3491         }
3492         kvm_vmx->ept_identity_pagetable_done = true;
3493 
3494 out:
3495         srcu_read_unlock(&kvm->srcu, idx);
3496 
3497 out2:
3498         mutex_unlock(&kvm->slots_lock);
3499         return r;
3500 }
3501 
3502 static void seg_setup(int seg)
3503 {
3504         const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
3505         unsigned int ar;
3506 
3507         vmcs_write16(sf->selector, 0);
3508         vmcs_writel(sf->base, 0);
3509         vmcs_write32(sf->limit, 0xffff);
3510         ar = 0x93;
3511         if (seg == VCPU_SREG_CS)
3512                 ar |= 0x08; 
3513 
3514         vmcs_write32(sf->ar_bytes, ar);
3515 }
3516 
3517 static int alloc_apic_access_page(struct kvm *kvm)
3518 {
3519         struct page *page;
3520         int r = 0;
3521 
3522         mutex_lock(&kvm->slots_lock);
3523         if (kvm->arch.apic_access_page_done)
3524                 goto out;
3525         r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
3526                                     APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
3527         if (r)
3528                 goto out;
3529 
3530         page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
3531         if (is_error_page(page)) {
3532                 r = -EFAULT;
3533                 goto out;
3534         }
3535 
3536         
3537 
3538 
3539 
3540         put_page(page);
3541         kvm->arch.apic_access_page_done = true;
3542 out:
3543         mutex_unlock(&kvm->slots_lock);
3544         return r;
3545 }
3546 
3547 int allocate_vpid(void)
3548 {
3549         int vpid;
3550 
3551         if (!enable_vpid)
3552                 return 0;
3553         spin_lock(&vmx_vpid_lock);
3554         vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
3555         if (vpid < VMX_NR_VPIDS)
3556                 __set_bit(vpid, vmx_vpid_bitmap);
3557         else
3558                 vpid = 0;
3559         spin_unlock(&vmx_vpid_lock);
3560         return vpid;
3561 }
3562 
3563 void free_vpid(int vpid)
3564 {
3565         if (!enable_vpid || vpid == 0)
3566                 return;
3567         spin_lock(&vmx_vpid_lock);
3568         __clear_bit(vpid, vmx_vpid_bitmap);
3569         spin_unlock(&vmx_vpid_lock);
3570 }
3571 
3572 static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
3573                                                           u32 msr, int type)
3574 {
3575         int f = sizeof(unsigned long);
3576 
3577         if (!cpu_has_vmx_msr_bitmap())
3578                 return;
3579 
3580         if (static_branch_unlikely(&enable_evmcs))
3581                 evmcs_touch_msr_bitmap();
3582 
3583         
3584 
3585 
3586 
3587 
3588         if (msr <= 0x1fff) {
3589                 if (type & MSR_TYPE_R)
3590                         
3591                         __clear_bit(msr, msr_bitmap + 0x000 / f);
3592 
3593                 if (type & MSR_TYPE_W)
3594                         
3595                         __clear_bit(msr, msr_bitmap + 0x800 / f);
3596 
3597         } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3598                 msr &= 0x1fff;
3599                 if (type & MSR_TYPE_R)
3600                         
3601                         __clear_bit(msr, msr_bitmap + 0x400 / f);
3602 
3603                 if (type & MSR_TYPE_W)
3604                         
3605                         __clear_bit(msr, msr_bitmap + 0xc00 / f);
3606 
3607         }
3608 }
3609 
3610 static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
3611                                                          u32 msr, int type)
3612 {
3613         int f = sizeof(unsigned long);
3614 
3615         if (!cpu_has_vmx_msr_bitmap())
3616                 return;
3617 
3618         if (static_branch_unlikely(&enable_evmcs))
3619                 evmcs_touch_msr_bitmap();
3620 
3621         
3622 
3623 
3624 
3625 
3626         if (msr <= 0x1fff) {
3627                 if (type & MSR_TYPE_R)
3628                         
3629                         __set_bit(msr, msr_bitmap + 0x000 / f);
3630 
3631                 if (type & MSR_TYPE_W)
3632                         
3633                         __set_bit(msr, msr_bitmap + 0x800 / f);
3634 
3635         } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
3636                 msr &= 0x1fff;
3637                 if (type & MSR_TYPE_R)
3638                         
3639                         __set_bit(msr, msr_bitmap + 0x400 / f);
3640 
3641                 if (type & MSR_TYPE_W)
3642                         
3643                         __set_bit(msr, msr_bitmap + 0xc00 / f);
3644 
3645         }
3646 }
3647 
3648 static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
3649                                                       u32 msr, int type, bool value)
3650 {
3651         if (value)
3652                 vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
3653         else
3654                 vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
3655 }
3656 
3657 static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
3658 {
3659         u8 mode = 0;
3660 
3661         if (cpu_has_secondary_exec_ctrls() &&
3662             (secondary_exec_controls_get(to_vmx(vcpu)) &
3663              SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
3664                 mode |= MSR_BITMAP_MODE_X2APIC;
3665                 if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
3666                         mode |= MSR_BITMAP_MODE_X2APIC_APICV;
3667         }
3668 
3669         return mode;
3670 }
3671 
3672 static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
3673                                          u8 mode)
3674 {
3675         int msr;
3676 
3677         for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
3678                 unsigned word = msr / BITS_PER_LONG;
3679                 msr_bitmap[word] = (mode & MSR_BITMAP_MODE_X2APIC_APICV) ? 0 : ~0;
3680                 msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
3681         }
3682 
3683         if (mode & MSR_BITMAP_MODE_X2APIC) {
3684                 
3685 
3686 
3687 
3688                 vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
3689                 if (mode & MSR_BITMAP_MODE_X2APIC_APICV) {
3690                         vmx_enable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_TMCCT), MSR_TYPE_R);
3691                         vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_EOI), MSR_TYPE_W);
3692                         vmx_disable_intercept_for_msr(msr_bitmap, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W);
3693                 }
3694         }
3695 }
3696 
3697 void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu)
3698 {
3699         struct vcpu_vmx *vmx = to_vmx(vcpu);
3700         unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
3701         u8 mode = vmx_msr_bitmap_mode(vcpu);
3702         u8 changed = mode ^ vmx->msr_bitmap_mode;
3703 
3704         if (!changed)
3705                 return;
3706 
3707         if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))
3708                 vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);
3709 
3710         vmx->msr_bitmap_mode = mode;
3711 }
3712 
3713 void pt_update_intercept_for_msr(struct vcpu_vmx *vmx)
3714 {
3715         unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
3716         bool flag = !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN);
3717         u32 i;
3718 
3719         vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_STATUS,
3720                                                         MSR_TYPE_RW, flag);
3721         vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_BASE,
3722                                                         MSR_TYPE_RW, flag);
3723         vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_OUTPUT_MASK,
3724                                                         MSR_TYPE_RW, flag);
3725         vmx_set_intercept_for_msr(msr_bitmap, MSR_IA32_RTIT_CR3_MATCH,
3726                                                         MSR_TYPE_RW, flag);
3727         for (i = 0; i < vmx->pt_desc.addr_range; i++) {
3728                 vmx_set_intercept_for_msr(msr_bitmap,
3729                         MSR_IA32_RTIT_ADDR0_A + i * 2, MSR_TYPE_RW, flag);
3730                 vmx_set_intercept_for_msr(msr_bitmap,
3731                         MSR_IA32_RTIT_ADDR0_B + i * 2, MSR_TYPE_RW, flag);
3732         }
3733 }
3734 
3735 static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
3736 {
3737         return enable_apicv;
3738 }
3739 
3740 static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
3741 {
3742         struct vcpu_vmx *vmx = to_vmx(vcpu);
3743         void *vapic_page;
3744         u32 vppr;
3745         int rvi;
3746 
3747         if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
3748                 !nested_cpu_has_vid(get_vmcs12(vcpu)) ||
3749                 WARN_ON_ONCE(!vmx->nested.virtual_apic_map.gfn))
3750                 return false;
3751 
3752         rvi = vmx_get_rvi();
3753 
3754         vapic_page = vmx->nested.virtual_apic_map.hva;
3755         vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
3756 
3757         return ((rvi & 0xf0) > (vppr & 0xf0));
3758 }
3759 
3760 static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
3761                                                      bool nested)
3762 {
3763 #ifdef CONFIG_SMP
3764         int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
3765 
3766         if (vcpu->mode == IN_GUEST_MODE) {
3767                 
3768 
3769 
3770 
3771 
3772 
3773 
3774 
3775 
3776 
3777 
3778 
3779 
3780 
3781 
3782 
3783 
3784 
3785 
3786 
3787 
3788 
3789 
3790 
3791 
3792                 apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
3793                 return true;
3794         }
3795 #endif
3796         return false;
3797 }
3798 
3799 static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
3800                                                 int vector)
3801 {
3802         struct vcpu_vmx *vmx = to_vmx(vcpu);
3803 
3804         if (is_guest_mode(vcpu) &&
3805             vector == vmx->nested.posted_intr_nv) {
3806                 
3807 
3808 
3809 
3810                 vmx->nested.pi_pending = true;
3811                 kvm_make_request(KVM_REQ_EVENT, vcpu);
3812                 
3813                 if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
3814                         kvm_vcpu_kick(vcpu);
3815                 return 0;
3816         }
3817         return -1;
3818 }
3819 
3820 
3821 
3822 
3823 
3824 
3825 
3826 static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
3827 {
3828         struct vcpu_vmx *vmx = to_vmx(vcpu);
3829         int r;
3830 
3831         r = vmx_deliver_nested_posted_interrupt(vcpu, vector);
3832         if (!r)
3833                 return 0;
3834 
3835         if (!vcpu->arch.apicv_active)
3836                 return -1;
3837 
3838         if (pi_test_and_set_pir(vector, &vmx->pi_desc))
3839                 return 0;
3840 
3841         
3842         if (pi_test_and_set_on(&vmx->pi_desc))
3843                 return 0;
3844 
3845         if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
3846                 kvm_vcpu_kick(vcpu);
3847 
3848         return 0;
3849 }
3850 
3851 
3852 
3853 
3854 
3855 
3856 
3857 void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
3858 {
3859         u32 low32, high32;
3860         unsigned long tmpl;
3861         unsigned long cr0, cr3, cr4;
3862 
3863         cr0 = read_cr0();
3864         WARN_ON(cr0 & X86_CR0_TS);
3865         vmcs_writel(HOST_CR0, cr0);  
3866 
3867         
3868 
3869 
3870 
3871         cr3 = __read_cr3();
3872         vmcs_writel(HOST_CR3, cr3);             
3873         vmx->loaded_vmcs->host_state.cr3 = cr3;
3874 
3875         
3876         cr4 = cr4_read_shadow();
3877         vmcs_writel(HOST_CR4, cr4);                     
3878         vmx->loaded_vmcs->host_state.cr4 = cr4;
3879 
3880         vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  
3881 #ifdef CONFIG_X86_64
3882         
3883 
3884 
3885 
3886 
3887         vmcs_write16(HOST_DS_SELECTOR, 0);
3888         vmcs_write16(HOST_ES_SELECTOR, 0);
3889 #else
3890         vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS);  
3891         vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS);  
3892 #endif
3893         vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  
3894         vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  
3895 
3896         vmcs_writel(HOST_IDTR_BASE, host_idt_base);   
3897 
3898         vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); 
3899 
3900         rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
3901         vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
3902         rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
3903         vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl);   
3904 
3905         if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
3906                 rdmsr(MSR_IA32_CR_PAT, low32, high32);
3907                 vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32));
3908         }
3909 
3910         if (cpu_has_load_ia32_efer())
3911                 vmcs_write64(HOST_IA32_EFER, host_efer);
3912 }
3913 
3914 void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
3915 {
3916         vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
3917         if (enable_ept)
3918                 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
3919         if (is_guest_mode(&vmx->vcpu))
3920                 vmx->vcpu.arch.cr4_guest_owned_bits &=
3921                         ~get_vmcs12(&vmx->vcpu)->cr4_guest_host_mask;
3922         vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
3923 }
3924 
3925 u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
3926 {
3927         u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
3928 
3929         if (!kvm_vcpu_apicv_active(&vmx->vcpu))
3930                 pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
3931 
3932         if (!enable_vnmi)
3933                 pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
3934 
3935         if (!enable_preemption_timer)
3936                 pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
3937 
3938         return pin_based_exec_ctrl;
3939 }
3940 
3941 static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
3942 {
3943         struct vcpu_vmx *vmx = to_vmx(vcpu);
3944 
3945         pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
3946         if (cpu_has_secondary_exec_ctrls()) {
3947                 if (kvm_vcpu_apicv_active(vcpu))
3948                         secondary_exec_controls_setbit(vmx,
3949                                       SECONDARY_EXEC_APIC_REGISTER_VIRT |
3950                                       SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
3951                 else
3952                         secondary_exec_controls_clearbit(vmx,
3953                                         SECONDARY_EXEC_APIC_REGISTER_VIRT |
3954                                         SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
3955         }
3956 
3957         if (cpu_has_vmx_msr_bitmap())
3958                 vmx_update_msr_bitmap(vcpu);
3959 }
3960 
3961 u32 vmx_exec_control(struct vcpu_vmx *vmx)
3962 {
3963         u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
3964 
3965         if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
3966                 exec_control &= ~CPU_BASED_MOV_DR_EXITING;
3967 
3968         if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
3969                 exec_control &= ~CPU_BASED_TPR_SHADOW;
3970 #ifdef CONFIG_X86_64
3971                 exec_control |= CPU_BASED_CR8_STORE_EXITING |
3972                                 CPU_BASED_CR8_LOAD_EXITING;
3973 #endif
3974         }
3975         if (!enable_ept)
3976                 exec_control |= CPU_BASED_CR3_STORE_EXITING |
3977                                 CPU_BASED_CR3_LOAD_EXITING  |
3978                                 CPU_BASED_INVLPG_EXITING;
3979         if (kvm_mwait_in_guest(vmx->vcpu.kvm))
3980                 exec_control &= ~(CPU_BASED_MWAIT_EXITING |
3981                                 CPU_BASED_MONITOR_EXITING);
3982         if (kvm_hlt_in_guest(vmx->vcpu.kvm))
3983                 exec_control &= ~CPU_BASED_HLT_EXITING;
3984         return exec_control;
3985 }
3986 
3987 
3988 static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
3989 {
3990         struct kvm_vcpu *vcpu = &vmx->vcpu;
3991 
3992         u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl;
3993 
3994         if (pt_mode == PT_MODE_SYSTEM)
3995                 exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX);
3996         if (!cpu_need_virtualize_apic_accesses(vcpu))
3997                 exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
3998         if (vmx->vpid == 0)
3999                 exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
4000         if (!enable_ept) {
4001                 exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
4002                 enable_unrestricted_guest = 0;
4003         }
4004         if (!enable_unrestricted_guest)
4005                 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
4006         if (kvm_pause_in_guest(vmx->vcpu.kvm))
4007                 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
4008         if (!kvm_vcpu_apicv_active(vcpu))
4009                 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
4010                                   SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
4011         exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
4012 
4013         
4014 
4015         exec_control &= ~SECONDARY_EXEC_DESC;
4016 
4017         
4018 
4019 
4020 
4021 
4022         exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
4023 
4024         if (!enable_pml)
4025                 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
4026 
4027         if (vmx_xsaves_supported()) {
4028                 
4029                 bool xsaves_enabled =
4030                         guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
4031                         guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
4032 
4033                 if (!xsaves_enabled)
4034                         exec_control &= ~SECONDARY_EXEC_XSAVES;
4035 
4036                 if (nested) {
4037                         if (xsaves_enabled)
4038                                 vmx->nested.msrs.secondary_ctls_high |=
4039                                         SECONDARY_EXEC_XSAVES;
4040                         else
4041                                 vmx->nested.msrs.secondary_ctls_high &=
4042                                         ~SECONDARY_EXEC_XSAVES;
4043                 }
4044         }
4045 
4046         if (vmx_rdtscp_supported()) {
4047                 bool rdtscp_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP);
4048                 if (!rdtscp_enabled)
4049                         exec_control &= ~SECONDARY_EXEC_RDTSCP;
4050 
4051                 if (nested) {
4052                         if (rdtscp_enabled)
4053                                 vmx->nested.msrs.secondary_ctls_high |=
4054                                         SECONDARY_EXEC_RDTSCP;
4055                         else
4056                                 vmx->nested.msrs.secondary_ctls_high &=
4057                                         ~SECONDARY_EXEC_RDTSCP;
4058                 }
4059         }
4060 
4061         if (vmx_invpcid_supported()) {
4062                 
4063                 bool invpcid_enabled =
4064                         guest_cpuid_has(vcpu, X86_FEATURE_INVPCID) &&
4065                         guest_cpuid_has(vcpu, X86_FEATURE_PCID);
4066 
4067                 if (!invpcid_enabled) {
4068                         exec_control &= ~SECONDARY_EXEC_ENABLE_INVPCID;
4069                         guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
4070                 }
4071 
4072                 if (nested) {
4073                         if (invpcid_enabled)
4074                                 vmx->nested.msrs.secondary_ctls_high |=
4075                                         SECONDARY_EXEC_ENABLE_INVPCID;
4076                         else
4077                                 vmx->nested.msrs.secondary_ctls_high &=
4078                                         ~SECONDARY_EXEC_ENABLE_INVPCID;
4079                 }
4080         }
4081 
4082         if (vmx_rdrand_supported()) {
4083                 bool rdrand_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDRAND);
4084                 if (rdrand_enabled)
4085                         exec_control &= ~SECONDARY_EXEC_RDRAND_EXITING;
4086 
4087                 if (nested) {
4088                         if (rdrand_enabled)
4089                                 vmx->nested.msrs.secondary_ctls_high |=
4090                                         SECONDARY_EXEC_RDRAND_EXITING;
4091                         else
4092                                 vmx->nested.msrs.secondary_ctls_high &=
4093                                         ~SECONDARY_EXEC_RDRAND_EXITING;
4094                 }
4095         }
4096 
4097         if (vmx_rdseed_supported()) {
4098                 bool rdseed_enabled = guest_cpuid_has(vcpu, X86_FEATURE_RDSEED);
4099                 if (rdseed_enabled)
4100                         exec_control &= ~SECONDARY_EXEC_RDSEED_EXITING;
4101 
4102                 if (nested) {
4103                         if (rdseed_enabled)
4104                                 vmx->nested.msrs.secondary_ctls_high |=
4105                                         SECONDARY_EXEC_RDSEED_EXITING;
4106                         else
4107                                 vmx->nested.msrs.secondary_ctls_high &=
4108                                         ~SECONDARY_EXEC_RDSEED_EXITING;
4109                 }
4110         }
4111 
4112         if (vmx_waitpkg_supported()) {
4113                 bool waitpkg_enabled =
4114                         guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
4115 
4116                 if (!waitpkg_enabled)
4117                         exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4118 
4119                 if (nested) {
4120                         if (waitpkg_enabled)
4121                                 vmx->nested.msrs.secondary_ctls_high |=
4122                                         SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4123                         else
4124                                 vmx->nested.msrs.secondary_ctls_high &=
4125                                         ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
4126                 }
4127         }
4128 
4129         vmx->secondary_exec_control = exec_control;
4130 }
4131 
4132 static void ept_set_mmio_spte_mask(void)
4133 {
4134         
4135 
4136 
4137 
4138         kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
4139                                    VMX_EPT_MISCONFIG_WX_VALUE, 0);
4140 }
4141 
4142 #define VMX_XSS_EXIT_BITMAP 0
4143 
4144 
4145 
4146 
4147 static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
4148 {
4149         int i;
4150 
4151         if (nested)
4152                 nested_vmx_vcpu_setup();
4153 
4154         if (cpu_has_vmx_msr_bitmap())
4155                 vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
4156 
4157         vmcs_write64(VMCS_LINK_POINTER, -1ull); 
4158 
4159         
4160         pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
4161         vmx->hv_deadline_tsc = -1;
4162 
4163         exec_controls_set(vmx, vmx_exec_control(vmx));
4164 
4165         if (cpu_has_secondary_exec_ctrls()) {
4166                 vmx_compute_secondary_exec_control(vmx);
4167                 secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
4168         }
4169 
4170         if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
4171                 vmcs_write64(EOI_EXIT_BITMAP0, 0);
4172                 vmcs_write64(EOI_EXIT_BITMAP1, 0);
4173                 vmcs_write64(EOI_EXIT_BITMAP2, 0);
4174                 vmcs_write64(EOI_EXIT_BITMAP3, 0);
4175 
4176                 vmcs_write16(GUEST_INTR_STATUS, 0);
4177 
4178                 vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
4179                 vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
4180         }
4181 
4182         if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
4183                 vmcs_write32(PLE_GAP, ple_gap);
4184                 vmx->ple_window = ple_window;
4185                 vmx->ple_window_dirty = true;
4186         }
4187 
4188         vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
4189         vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
4190         vmcs_write32(CR3_TARGET_COUNT, 0);           
4191 
4192         vmcs_write16(HOST_FS_SELECTOR, 0);            
4193         vmcs_write16(HOST_GS_SELECTOR, 0);            
4194         vmx_set_constant_host_state(vmx);
4195         vmcs_writel(HOST_FS_BASE, 0); 
4196         vmcs_writel(HOST_GS_BASE, 0); 
4197 
4198         if (cpu_has_vmx_vmfunc())
4199                 vmcs_write64(VM_FUNCTION_CONTROL, 0);
4200 
4201         vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
4202         vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
4203         vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
4204         vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
4205         vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
4206 
4207         if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
4208                 vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
4209 
4210         for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
4211                 u32 index = vmx_msr_index[i];
4212                 u32 data_low, data_high;
4213                 int j = vmx->nmsrs;
4214 
4215                 if (rdmsr_safe(index, &data_low, &data_high) < 0)
4216                         continue;
4217                 if (wrmsr_safe(index, data_low, data_high) < 0)
4218                         continue;
4219                 vmx->guest_msrs[j].index = i;
4220                 vmx->guest_msrs[j].data = 0;
4221                 vmx->guest_msrs[j].mask = -1ull;
4222                 ++vmx->nmsrs;
4223         }
4224 
4225         vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
4226 
4227         
4228         vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
4229 
4230         vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
4231         vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
4232 
4233         set_cr4_guest_host_mask(vmx);
4234 
4235         if (vmx_xsaves_supported())
4236                 vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
4237 
4238         if (enable_pml) {
4239                 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
4240                 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
4241         }
4242 
4243         if (cpu_has_vmx_encls_vmexit())
4244                 vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
4245 
4246         if (pt_mode == PT_MODE_HOST_GUEST) {
4247                 memset(&vmx->pt_desc, 0, sizeof(vmx->pt_desc));
4248                 
4249                 vmx->pt_desc.guest.output_mask = 0x7F;
4250                 vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
4251         }
4252 }
4253 
4254 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
4255 {
4256         struct vcpu_vmx *vmx = to_vmx(vcpu);
4257         struct msr_data apic_base_msr;
4258         u64 cr0;
4259 
4260         vmx->rmode.vm86_active = 0;
4261         vmx->spec_ctrl = 0;
4262 
4263         vmx->msr_ia32_umwait_control = 0;
4264 
4265         vcpu->arch.microcode_version = 0x100000000ULL;
4266         vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
4267         vmx->hv_deadline_tsc = -1;
4268         kvm_set_cr8(vcpu, 0);
4269 
4270         if (!init_event) {
4271                 apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
4272                                      MSR_IA32_APICBASE_ENABLE;
4273                 if (kvm_vcpu_is_reset_bsp(vcpu))
4274                         apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
4275                 apic_base_msr.host_initiated = true;
4276                 kvm_set_apic_base(vcpu, &apic_base_msr);
4277         }
4278 
4279         vmx_segment_cache_clear(vmx);
4280 
4281         seg_setup(VCPU_SREG_CS);
4282         vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
4283         vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
4284 
4285         seg_setup(VCPU_SREG_DS);
4286         seg_setup(VCPU_SREG_ES);
4287         seg_setup(VCPU_SREG_FS);
4288         seg_setup(VCPU_SREG_GS);
4289         seg_setup(VCPU_SREG_SS);
4290 
4291         vmcs_write16(GUEST_TR_SELECTOR, 0);
4292         vmcs_writel(GUEST_TR_BASE, 0);
4293         vmcs_write32(GUEST_TR_LIMIT, 0xffff);
4294         vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
4295 
4296         vmcs_write16(GUEST_LDTR_SELECTOR, 0);
4297         vmcs_writel(GUEST_LDTR_BASE, 0);
4298         vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
4299         vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
4300 
4301         if (!init_event) {
4302                 vmcs_write32(GUEST_SYSENTER_CS, 0);
4303                 vmcs_writel(GUEST_SYSENTER_ESP, 0);
4304                 vmcs_writel(GUEST_SYSENTER_EIP, 0);
4305                 vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
4306         }
4307 
4308         kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
4309         kvm_rip_write(vcpu, 0xfff0);
4310 
4311         vmcs_writel(GUEST_GDTR_BASE, 0);
4312         vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
4313 
4314         vmcs_writel(GUEST_IDTR_BASE, 0);
4315         vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
4316 
4317         vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
4318         vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
4319         vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
4320         if (kvm_mpx_supported())
4321                 vmcs_write64(GUEST_BNDCFGS, 0);
4322 
4323         setup_msrs(vmx);
4324 
4325         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  
4326 
4327         if (cpu_has_vmx_tpr_shadow() && !init_event) {
4328                 vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
4329                 if (cpu_need_tpr_shadow(vcpu))
4330                         vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
4331                                      __pa(vcpu->arch.apic->regs));
4332                 vmcs_write32(TPR_THRESHOLD, 0);
4333         }
4334 
4335         kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
4336 
4337         if (vmx->vpid != 0)
4338                 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
4339 
4340         cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
4341         vmx->vcpu.arch.cr0 = cr0;
4342         vmx_set_cr0(vcpu, cr0); 
4343         vmx_set_cr4(vcpu, 0);
4344         vmx_set_efer(vcpu, 0);
4345 
4346         update_exception_bitmap(vcpu);
4347 
4348         vpid_sync_context(vmx->vpid);
4349         if (init_event)
4350                 vmx_clear_hlt(vcpu);
4351 }
4352 
4353 static void enable_irq_window(struct kvm_vcpu *vcpu)
4354 {
4355         exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
4356 }
4357 
4358 static void enable_nmi_window(struct kvm_vcpu *vcpu)
4359 {
4360         if (!enable_vnmi ||
4361             vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
4362                 enable_irq_window(vcpu);
4363                 return;
4364         }
4365 
4366         exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
4367 }
4368 
4369 static void vmx_inject_irq(struct kvm_vcpu *vcpu)
4370 {
4371         struct vcpu_vmx *vmx = to_vmx(vcpu);
4372         uint32_t intr;
4373         int irq = vcpu->arch.interrupt.nr;
4374 
4375         trace_kvm_inj_virq(irq);
4376 
4377         ++vcpu->stat.irq_injections;
4378         if (vmx->rmode.vm86_active) {
4379                 int inc_eip = 0;
4380                 if (vcpu->arch.interrupt.soft)
4381                         inc_eip = vcpu->arch.event_exit_inst_len;
4382                 kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
4383                 return;
4384         }
4385         intr = irq | INTR_INFO_VALID_MASK;
4386         if (vcpu->arch.interrupt.soft) {
4387                 intr |= INTR_TYPE_SOFT_INTR;
4388                 vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
4389                              vmx->vcpu.arch.event_exit_inst_len);
4390         } else
4391                 intr |= INTR_TYPE_EXT_INTR;
4392         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
4393 
4394         vmx_clear_hlt(vcpu);
4395 }
4396 
4397 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
4398 {
4399         struct vcpu_vmx *vmx = to_vmx(vcpu);
4400 
4401         if (!enable_vnmi) {
4402                 
4403 
4404 
4405 
4406 
4407 
4408 
4409 
4410                 vmx->loaded_vmcs->soft_vnmi_blocked = 1;
4411                 vmx->loaded_vmcs->vnmi_blocked_time = 0;
4412         }
4413 
4414         ++vcpu->stat.nmi_injections;
4415         vmx->loaded_vmcs->nmi_known_unmasked = false;
4416 
4417         if (vmx->rmode.vm86_active) {
4418                 kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
4419                 return;
4420         }
4421 
4422         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
4423                         INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
4424 
4425         vmx_clear_hlt(vcpu);
4426 }
4427 
4428 bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
4429 {
4430         struct vcpu_vmx *vmx = to_vmx(vcpu);
4431         bool masked;
4432 
4433         if (!enable_vnmi)
4434                 return vmx->loaded_vmcs->soft_vnmi_blocked;
4435         if (vmx->loaded_vmcs->nmi_known_unmasked)
4436                 return false;
4437         masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
4438         vmx->loaded_vmcs->nmi_known_unmasked = !masked;
4439         return masked;
4440 }
4441 
4442 void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4443 {
4444         struct vcpu_vmx *vmx = to_vmx(vcpu);
4445 
4446         if (!enable_vnmi) {
4447                 if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
4448                         vmx->loaded_vmcs->soft_vnmi_blocked = masked;
4449                         vmx->loaded_vmcs->vnmi_blocked_time = 0;
4450                 }
4451         } else {
4452                 vmx->loaded_vmcs->nmi_known_unmasked = !masked;
4453                 if (masked)
4454                         vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
4455                                       GUEST_INTR_STATE_NMI);
4456                 else
4457                         vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
4458                                         GUEST_INTR_STATE_NMI);
4459         }
4460 }
4461 
4462 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4463 {
4464         if (to_vmx(vcpu)->nested.nested_run_pending)
4465                 return 0;
4466 
4467         if (!enable_vnmi &&
4468             to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
4469                 return 0;
4470 
4471         return  !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
4472                   (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
4473                    | GUEST_INTR_STATE_NMI));
4474 }
4475 
4476 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
4477 {
4478         if (to_vmx(vcpu)->nested.nested_run_pending)
4479                 return false;
4480 
4481         if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
4482                 return true;
4483 
4484         return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
4485                 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
4486                         (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
4487 }
4488 
4489 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
4490 {
4491         int ret;
4492 
4493         if (enable_unrestricted_guest)
4494                 return 0;
4495 
4496         ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
4497                                     PAGE_SIZE * 3);
4498         if (ret)
4499                 return ret;
4500         to_kvm_vmx(kvm)->tss_addr = addr;
4501         return init_rmode_tss(kvm);
4502 }
4503 
4504 static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
4505 {
4506         to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
4507         return 0;
4508 }
4509 
4510 static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
4511 {
4512         switch (vec) {
4513         case BP_VECTOR:
4514                 
4515 
4516 
4517 
4518                 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
4519                         vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4520                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
4521                         return false;
4522                 
4523         case DB_VECTOR:
4524                 if (vcpu->guest_debug &
4525                         (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
4526                         return false;
4527                 
4528         case DE_VECTOR:
4529         case OF_VECTOR:
4530         case BR_VECTOR:
4531         case UD_VECTOR:
4532         case DF_VECTOR:
4533         case SS_VECTOR:
4534         case GP_VECTOR:
4535         case MF_VECTOR:
4536                 return true;
4537         break;
4538         }
4539         return false;
4540 }
4541 
4542 static int handle_rmode_exception(struct kvm_vcpu *vcpu,
4543                                   int vec, u32 err_code)
4544 {
4545         
4546 
4547 
4548 
4549         if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
4550                 if (kvm_emulate_instruction(vcpu, 0)) {
4551                         if (vcpu->arch.halt_request) {
4552                                 vcpu->arch.halt_request = 0;
4553                                 return kvm_vcpu_halt(vcpu);
4554                         }
4555                         return 1;
4556                 }
4557                 return 0;
4558         }
4559 
4560         
4561 
4562 
4563 
4564 
4565         kvm_queue_exception(vcpu, vec);
4566         return 1;
4567 }
4568 
4569 
4570 
4571 
4572 
4573 
4574 
4575 
4576 static void kvm_machine_check(void)
4577 {
4578 #if defined(CONFIG_X86_MCE)
4579         struct pt_regs regs = {
4580                 .cs = 3, 
4581                 .flags = X86_EFLAGS_IF,
4582         };
4583 
4584         do_machine_check(®s, 0);
4585 #endif
4586 }
4587 
4588 static int handle_machine_check(struct kvm_vcpu *vcpu)
4589 {
4590         
4591         return 1;
4592 }
4593 
4594 static int handle_exception_nmi(struct kvm_vcpu *vcpu)
4595 {
4596         struct vcpu_vmx *vmx = to_vmx(vcpu);
4597         struct kvm_run *kvm_run = vcpu->run;
4598         u32 intr_info, ex_no, error_code;
4599         unsigned long cr2, rip, dr6;
4600         u32 vect_info;
4601 
4602         vect_info = vmx->idt_vectoring_info;
4603         intr_info = vmx->exit_intr_info;
4604 
4605         if (is_machine_check(intr_info) || is_nmi(intr_info))
4606                 return 1; 
4607 
4608         if (is_invalid_opcode(intr_info))
4609                 return handle_ud(vcpu);
4610 
4611         error_code = 0;
4612         if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
4613                 error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
4614 
4615         if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
4616                 WARN_ON_ONCE(!enable_vmware_backdoor);
4617 
4618                 
4619 
4620 
4621 
4622 
4623                 if (error_code) {
4624                         kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
4625                         return 1;
4626                 }
4627                 return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
4628         }
4629 
4630         
4631 
4632 
4633 
4634 
4635         if ((vect_info & VECTORING_INFO_VALID_MASK) &&
4636             !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
4637                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
4638                 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
4639                 vcpu->run->internal.ndata = 3;
4640                 vcpu->run->internal.data[0] = vect_info;
4641                 vcpu->run->internal.data[1] = intr_info;
4642                 vcpu->run->internal.data[2] = error_code;
4643                 return 0;
4644         }
4645 
4646         if (is_page_fault(intr_info)) {
4647                 cr2 = vmcs_readl(EXIT_QUALIFICATION);
4648                 
4649                 WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
4650                 return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0);
4651         }
4652 
4653         ex_no = intr_info & INTR_INFO_VECTOR_MASK;
4654 
4655         if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no))
4656                 return handle_rmode_exception(vcpu, ex_no, error_code);
4657 
4658         switch (ex_no) {
4659         case AC_VECTOR:
4660                 kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
4661                 return 1;
4662         case DB_VECTOR:
4663                 dr6 = vmcs_readl(EXIT_QUALIFICATION);
4664                 if (!(vcpu->guest_debug &
4665                       (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
4666                         vcpu->arch.dr6 &= ~DR_TRAP_BITS;
4667                         vcpu->arch.dr6 |= dr6 | DR6_RTM;
4668                         if (is_icebp(intr_info))
4669                                 WARN_ON(!skip_emulated_instruction(vcpu));
4670 
4671                         kvm_queue_exception(vcpu, DB_VECTOR);
4672                         return 1;
4673                 }
4674                 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
4675                 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
4676                 
4677         case BP_VECTOR:
4678                 
4679 
4680 
4681 
4682 
4683                 vmx->vcpu.arch.event_exit_inst_len =
4684                         vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
4685                 kvm_run->exit_reason = KVM_EXIT_DEBUG;
4686                 rip = kvm_rip_read(vcpu);
4687                 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
4688                 kvm_run->debug.arch.exception = ex_no;
4689                 break;
4690         default:
4691                 kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
4692                 kvm_run->ex.exception = ex_no;
4693                 kvm_run->ex.error_code = error_code;
4694                 break;
4695         }
4696         return 0;
4697 }
4698 
4699 static int handle_external_interrupt(struct kvm_vcpu *vcpu)
4700 {
4701         ++vcpu->stat.irq_exits;
4702         return 1;
4703 }
4704 
4705 static int handle_triple_fault(struct kvm_vcpu *vcpu)
4706 {
4707         vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
4708         vcpu->mmio_needed = 0;
4709         return 0;
4710 }
4711 
4712 static int handle_io(struct kvm_vcpu *vcpu)
4713 {
4714         unsigned long exit_qualification;
4715         int size, in, string;
4716         unsigned port;
4717 
4718         exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4719         string = (exit_qualification & 16) != 0;
4720 
4721         ++vcpu->stat.io_exits;
4722 
4723         if (string)
4724                 return kvm_emulate_instruction(vcpu, 0);
4725 
4726         port = exit_qualification >> 16;
4727         size = (exit_qualification & 7) + 1;
4728         in = (exit_qualification & 8) != 0;
4729 
4730         return kvm_fast_pio(vcpu, size, port, in);
4731 }
4732 
4733 static void
4734 vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
4735 {
4736         
4737 
4738 
4739         hypercall[0] = 0x0f;
4740         hypercall[1] = 0x01;
4741         hypercall[2] = 0xc1;
4742 }
4743 
4744 
4745 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
4746 {
4747         if (is_guest_mode(vcpu)) {
4748                 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4749                 unsigned long orig_val = val;
4750 
4751                 
4752 
4753 
4754 
4755 
4756 
4757 
4758 
4759                 val = (val & ~vmcs12->cr0_guest_host_mask) |
4760                         (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
4761 
4762                 if (!nested_guest_cr0_valid(vcpu, val))
4763                         return 1;
4764 
4765                 if (kvm_set_cr0(vcpu, val))
4766                         return 1;
4767                 vmcs_writel(CR0_READ_SHADOW, orig_val);
4768                 return 0;
4769         } else {
4770                 if (to_vmx(vcpu)->nested.vmxon &&
4771                     !nested_host_cr0_valid(vcpu, val))
4772                         return 1;
4773 
4774                 return kvm_set_cr0(vcpu, val);
4775         }
4776 }
4777 
4778 static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
4779 {
4780         if (is_guest_mode(vcpu)) {
4781                 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
4782                 unsigned long orig_val = val;
4783 
4784                 
4785                 val = (val & ~vmcs12->cr4_guest_host_mask) |
4786                         (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask);
4787                 if (kvm_set_cr4(vcpu, val))
4788                         return 1;
4789                 vmcs_writel(CR4_READ_SHADOW, orig_val);
4790                 return 0;
4791         } else
4792                 return kvm_set_cr4(vcpu, val);
4793 }
4794 
4795 static int handle_desc(struct kvm_vcpu *vcpu)
4796 {
4797         WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
4798         return kvm_emulate_instruction(vcpu, 0);
4799 }
4800 
4801 static int handle_cr(struct kvm_vcpu *vcpu)
4802 {
4803         unsigned long exit_qualification, val;
4804         int cr;
4805         int reg;
4806         int err;
4807         int ret;
4808 
4809         exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4810         cr = exit_qualification & 15;
4811         reg = (exit_qualification >> 8) & 15;
4812         switch ((exit_qualification >> 4) & 3) {
4813         case 0: 
4814                 val = kvm_register_readl(vcpu, reg);
4815                 trace_kvm_cr_write(cr, val);
4816                 switch (cr) {
4817                 case 0:
4818                         err = handle_set_cr0(vcpu, val);
4819                         return kvm_complete_insn_gp(vcpu, err);
4820                 case 3:
4821                         WARN_ON_ONCE(enable_unrestricted_guest);
4822                         err = kvm_set_cr3(vcpu, val);
4823                         return kvm_complete_insn_gp(vcpu, err);
4824                 case 4:
4825                         err = handle_set_cr4(vcpu, val);
4826                         return kvm_complete_insn_gp(vcpu, err);
4827                 case 8: {
4828                                 u8 cr8_prev = kvm_get_cr8(vcpu);
4829                                 u8 cr8 = (u8)val;
4830                                 err = kvm_set_cr8(vcpu, cr8);
4831                                 ret = kvm_complete_insn_gp(vcpu, err);
4832                                 if (lapic_in_kernel(vcpu))
4833                                         return ret;
4834                                 if (cr8_prev <= cr8)
4835                                         return ret;
4836                                 
4837 
4838 
4839 
4840 
4841                                 vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
4842                                 return 0;
4843                         }
4844                 }
4845                 break;
4846         case 2: 
4847                 WARN_ONCE(1, "Guest should always own CR0.TS");
4848                 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
4849                 trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
4850                 return kvm_skip_emulated_instruction(vcpu);
4851         case 1: 
4852                 switch (cr) {
4853                 case 3:
4854                         WARN_ON_ONCE(enable_unrestricted_guest);
4855                         val = kvm_read_cr3(vcpu);
4856                         kvm_register_write(vcpu, reg, val);
4857                         trace_kvm_cr_read(cr, val);
4858                         return kvm_skip_emulated_instruction(vcpu);
4859                 case 8:
4860                         val = kvm_get_cr8(vcpu);
4861                         kvm_register_write(vcpu, reg, val);
4862                         trace_kvm_cr_read(cr, val);
4863                         return kvm_skip_emulated_instruction(vcpu);
4864                 }
4865                 break;
4866         case 3: 
4867                 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
4868                 trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
4869                 kvm_lmsw(vcpu, val);
4870 
4871                 return kvm_skip_emulated_instruction(vcpu);
4872         default:
4873                 break;
4874         }
4875         vcpu->run->exit_reason = 0;
4876         vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
4877                (int)(exit_qualification >> 4) & 3, cr);
4878         return 0;
4879 }
4880 
4881 static int handle_dr(struct kvm_vcpu *vcpu)
4882 {
4883         unsigned long exit_qualification;
4884         int dr, dr7, reg;
4885 
4886         exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
4887         dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
4888 
4889         
4890         if (!kvm_require_dr(vcpu, dr))
4891                 return 1;
4892 
4893         
4894         if (!kvm_require_cpl(vcpu, 0))
4895                 return 1;
4896         dr7 = vmcs_readl(GUEST_DR7);
4897         if (dr7 & DR7_GD) {
4898                 
4899 
4900 
4901 
4902 
4903                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
4904                         vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
4905                         vcpu->run->debug.arch.dr7 = dr7;
4906                         vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
4907                         vcpu->run->debug.arch.exception = DB_VECTOR;
4908                         vcpu->run->exit_reason = KVM_EXIT_DEBUG;
4909                         return 0;
4910                 } else {
4911                         vcpu->arch.dr6 &= ~DR_TRAP_BITS;
4912                         vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
4913                         kvm_queue_exception(vcpu, DB_VECTOR);
4914                         return 1;
4915                 }
4916         }
4917 
4918         if (vcpu->guest_debug == 0) {
4919                 exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
4920 
4921                 
4922 
4923 
4924 
4925 
4926                 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
4927                 return 1;
4928         }
4929 
4930         reg = DEBUG_REG_ACCESS_REG(exit_qualification);
4931         if (exit_qualification & TYPE_MOV_FROM_DR) {
4932                 unsigned long val;
4933 
4934                 if (kvm_get_dr(vcpu, dr, &val))
4935                         return 1;
4936                 kvm_register_write(vcpu, reg, val);
4937         } else
4938                 if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
4939                         return 1;
4940 
4941         return kvm_skip_emulated_instruction(vcpu);
4942 }
4943 
4944 static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
4945 {
4946         return vcpu->arch.dr6;
4947 }
4948 
4949 static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
4950 {
4951 }
4952 
4953 static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
4954 {
4955         get_debugreg(vcpu->arch.db[0], 0);
4956         get_debugreg(vcpu->arch.db[1], 1);
4957         get_debugreg(vcpu->arch.db[2], 2);
4958         get_debugreg(vcpu->arch.db[3], 3);
4959         get_debugreg(vcpu->arch.dr6, 6);
4960         vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
4961 
4962         vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
4963         exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
4964 }
4965 
4966 static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
4967 {
4968         vmcs_writel(GUEST_DR7, val);
4969 }
4970 
4971 static int handle_cpuid(struct kvm_vcpu *vcpu)
4972 {
4973         return kvm_emulate_cpuid(vcpu);
4974 }
4975 
4976 static int handle_rdmsr(struct kvm_vcpu *vcpu)
4977 {
4978         return kvm_emulate_rdmsr(vcpu);
4979 }
4980 
4981 static int handle_wrmsr(struct kvm_vcpu *vcpu)
4982 {
4983         return kvm_emulate_wrmsr(vcpu);
4984 }
4985 
4986 static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
4987 {
4988         kvm_apic_update_ppr(vcpu);
4989         return 1;
4990 }
4991 
4992 static int handle_interrupt_window(struct kvm_vcpu *vcpu)
4993 {
4994         exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
4995 
4996         kvm_make_request(KVM_REQ_EVENT, vcpu);
4997 
4998         ++vcpu->stat.irq_window_exits;
4999         return 1;
5000 }
5001 
5002 static int handle_halt(struct kvm_vcpu *vcpu)
5003 {
5004         return kvm_emulate_halt(vcpu);
5005 }
5006 
5007 static int handle_vmcall(struct kvm_vcpu *vcpu)
5008 {
5009         return kvm_emulate_hypercall(vcpu);
5010 }
5011 
5012 static int handle_invd(struct kvm_vcpu *vcpu)
5013 {
5014         return kvm_emulate_instruction(vcpu, 0);
5015 }
5016 
5017 static int handle_invlpg(struct kvm_vcpu *vcpu)
5018 {
5019         unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5020 
5021         kvm_mmu_invlpg(vcpu, exit_qualification);
5022         return kvm_skip_emulated_instruction(vcpu);
5023 }
5024 
5025 static int handle_rdpmc(struct kvm_vcpu *vcpu)
5026 {
5027         int err;
5028 
5029         err = kvm_rdpmc(vcpu);
5030         return kvm_complete_insn_gp(vcpu, err);
5031 }
5032 
5033 static int handle_wbinvd(struct kvm_vcpu *vcpu)
5034 {
5035         return kvm_emulate_wbinvd(vcpu);
5036 }
5037 
5038 static int handle_xsetbv(struct kvm_vcpu *vcpu)
5039 {
5040         u64 new_bv = kvm_read_edx_eax(vcpu);
5041         u32 index = kvm_rcx_read(vcpu);
5042 
5043         if (kvm_set_xcr(vcpu, index, new_bv) == 0)
5044                 return kvm_skip_emulated_instruction(vcpu);
5045         return 1;
5046 }
5047 
5048 static int handle_apic_access(struct kvm_vcpu *vcpu)
5049 {
5050         if (likely(fasteoi)) {
5051                 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5052                 int access_type, offset;
5053 
5054                 access_type = exit_qualification & APIC_ACCESS_TYPE;
5055                 offset = exit_qualification & APIC_ACCESS_OFFSET;
5056                 
5057 
5058 
5059 
5060 
5061                 if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
5062                     (offset == APIC_EOI)) {
5063                         kvm_lapic_set_eoi(vcpu);
5064                         return kvm_skip_emulated_instruction(vcpu);
5065                 }
5066         }
5067         return kvm_emulate_instruction(vcpu, 0);
5068 }
5069 
5070 static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
5071 {
5072         unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5073         int vector = exit_qualification & 0xff;
5074 
5075         
5076         kvm_apic_set_eoi_accelerated(vcpu, vector);
5077         return 1;
5078 }
5079 
5080 static int handle_apic_write(struct kvm_vcpu *vcpu)
5081 {
5082         unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5083         u32 offset = exit_qualification & 0xfff;
5084 
5085         
5086         kvm_apic_write_nodecode(vcpu, offset);
5087         return 1;
5088 }
5089 
5090 static int handle_task_switch(struct kvm_vcpu *vcpu)
5091 {
5092         struct vcpu_vmx *vmx = to_vmx(vcpu);
5093         unsigned long exit_qualification;
5094         bool has_error_code = false;
5095         u32 error_code = 0;
5096         u16 tss_selector;
5097         int reason, type, idt_v, idt_index;
5098 
5099         idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
5100         idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
5101         type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
5102 
5103         exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5104 
5105         reason = (u32)exit_qualification >> 30;
5106         if (reason == TASK_SWITCH_GATE && idt_v) {
5107                 switch (type) {
5108                 case INTR_TYPE_NMI_INTR:
5109                         vcpu->arch.nmi_injected = false;
5110                         vmx_set_nmi_mask(vcpu, true);
5111                         break;
5112                 case INTR_TYPE_EXT_INTR:
5113                 case INTR_TYPE_SOFT_INTR:
5114                         kvm_clear_interrupt_queue(vcpu);
5115                         break;
5116                 case INTR_TYPE_HARD_EXCEPTION:
5117                         if (vmx->idt_vectoring_info &
5118                             VECTORING_INFO_DELIVER_CODE_MASK) {
5119                                 has_error_code = true;
5120                                 error_code =
5121                                         vmcs_read32(IDT_VECTORING_ERROR_CODE);
5122                         }
5123                         
5124                 case INTR_TYPE_SOFT_EXCEPTION:
5125                         kvm_clear_exception_queue(vcpu);
5126                         break;
5127                 default:
5128                         break;
5129                 }
5130         }
5131         tss_selector = exit_qualification;
5132 
5133         if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
5134                        type != INTR_TYPE_EXT_INTR &&
5135                        type != INTR_TYPE_NMI_INTR))
5136                 WARN_ON(!skip_emulated_instruction(vcpu));
5137 
5138         
5139 
5140 
5141 
5142         return kvm_task_switch(vcpu, tss_selector,
5143                                type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
5144                                reason, has_error_code, error_code);
5145 }
5146 
5147 static int handle_ept_violation(struct kvm_vcpu *vcpu)
5148 {
5149         unsigned long exit_qualification;
5150         gpa_t gpa;
5151         u64 error_code;
5152 
5153         exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5154 
5155         
5156 
5157 
5158 
5159 
5160 
5161         if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
5162                         enable_vnmi &&
5163                         (exit_qualification & INTR_INFO_UNBLOCK_NMI))
5164                 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
5165 
5166         gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5167         trace_kvm_page_fault(gpa, exit_qualification);
5168 
5169         
5170         error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
5171                      ? PFERR_USER_MASK : 0;
5172         
5173         error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE)
5174                       ? PFERR_WRITE_MASK : 0;
5175         
5176         error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR)
5177                       ? PFERR_FETCH_MASK : 0;
5178         
5179         error_code |= (exit_qualification &
5180                        (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE |
5181                         EPT_VIOLATION_EXECUTABLE))
5182                       ? PFERR_PRESENT_MASK : 0;
5183 
5184         error_code |= (exit_qualification & 0x100) != 0 ?
5185                PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
5186 
5187         vcpu->arch.exit_qualification = exit_qualification;
5188         return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
5189 }
5190 
5191 static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
5192 {
5193         gpa_t gpa;
5194 
5195         
5196 
5197 
5198 
5199         gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5200         if (!is_guest_mode(vcpu) &&
5201             !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
5202                 trace_kvm_fast_mmio(gpa);
5203                 return kvm_skip_emulated_instruction(vcpu);
5204         }
5205 
5206         return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
5207 }
5208 
5209 static int handle_nmi_window(struct kvm_vcpu *vcpu)
5210 {
5211         WARN_ON_ONCE(!enable_vnmi);
5212         exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
5213         ++vcpu->stat.nmi_window_exits;
5214         kvm_make_request(KVM_REQ_EVENT, vcpu);
5215 
5216         return 1;
5217 }
5218 
5219 static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5220 {
5221         struct vcpu_vmx *vmx = to_vmx(vcpu);
5222         bool intr_window_requested;
5223         unsigned count = 130;
5224 
5225         
5226 
5227 
5228 
5229 
5230         WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
5231 
5232         intr_window_requested = exec_controls_get(vmx) &
5233                                 CPU_BASED_VIRTUAL_INTR_PENDING;
5234 
5235         while (vmx->emulation_required && count-- != 0) {
5236                 if (intr_window_requested && vmx_interrupt_allowed(vcpu))
5237                         return handle_interrupt_window(&vmx->vcpu);
5238 
5239                 if (kvm_test_request(KVM_REQ_EVENT, vcpu))
5240                         return 1;
5241 
5242                 if (!kvm_emulate_instruction(vcpu, 0))
5243                         return 0;
5244 
5245                 if (vmx->emulation_required && !vmx->rmode.vm86_active &&
5246                     vcpu->arch.exception.pending) {
5247                         vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5248                         vcpu->run->internal.suberror =
5249                                                 KVM_INTERNAL_ERROR_EMULATION;
5250                         vcpu->run->internal.ndata = 0;
5251                         return 0;
5252                 }
5253 
5254                 if (vcpu->arch.halt_request) {
5255                         vcpu->arch.halt_request = 0;
5256                         return kvm_vcpu_halt(vcpu);
5257                 }
5258 
5259                 
5260 
5261 
5262 
5263                 if (signal_pending(current))
5264                         return 1;
5265 
5266                 if (need_resched())
5267                         schedule();
5268         }
5269 
5270         return 1;
5271 }
5272 
5273 static void grow_ple_window(struct kvm_vcpu *vcpu)
5274 {
5275         struct vcpu_vmx *vmx = to_vmx(vcpu);
5276         unsigned int old = vmx->ple_window;
5277 
5278         vmx->ple_window = __grow_ple_window(old, ple_window,
5279                                             ple_window_grow,
5280                                             ple_window_max);
5281 
5282         if (vmx->ple_window != old) {
5283                 vmx->ple_window_dirty = true;
5284                 trace_kvm_ple_window_update(vcpu->vcpu_id,
5285                                             vmx->ple_window, old);
5286         }
5287 }
5288 
5289 static void shrink_ple_window(struct kvm_vcpu *vcpu)
5290 {
5291         struct vcpu_vmx *vmx = to_vmx(vcpu);
5292         unsigned int old = vmx->ple_window;
5293 
5294         vmx->ple_window = __shrink_ple_window(old, ple_window,
5295                                               ple_window_shrink,
5296                                               ple_window);
5297 
5298         if (vmx->ple_window != old) {
5299                 vmx->ple_window_dirty = true;
5300                 trace_kvm_ple_window_update(vcpu->vcpu_id,
5301                                             vmx->ple_window, old);
5302         }
5303 }
5304 
5305 
5306 
5307 
5308 static void wakeup_handler(void)
5309 {
5310         struct kvm_vcpu *vcpu;
5311         int cpu = smp_processor_id();
5312 
5313         spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
5314         list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
5315                         blocked_vcpu_list) {
5316                 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
5317 
5318                 if (pi_test_on(pi_desc) == 1)
5319                         kvm_vcpu_kick(vcpu);
5320         }
5321         spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
5322 }
5323 
5324 static void vmx_enable_tdp(void)
5325 {
5326         kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
5327                 enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
5328                 enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
5329                 0ull, VMX_EPT_EXECUTABLE_MASK,
5330                 cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
5331                 VMX_EPT_RWX_MASK, 0ull);
5332 
5333         ept_set_mmio_spte_mask();
5334         kvm_enable_tdp();
5335 }
5336 
5337 
5338 
5339 
5340 
5341 static int handle_pause(struct kvm_vcpu *vcpu)
5342 {
5343         if (!kvm_pause_in_guest(vcpu->kvm))
5344                 grow_ple_window(vcpu);
5345 
5346         
5347 
5348 
5349 
5350 
5351 
5352         kvm_vcpu_on_spin(vcpu, true);
5353         return kvm_skip_emulated_instruction(vcpu);
5354 }
5355 
5356 static int handle_nop(struct kvm_vcpu *vcpu)
5357 {
5358         return kvm_skip_emulated_instruction(vcpu);
5359 }
5360 
5361 static int handle_mwait(struct kvm_vcpu *vcpu)
5362 {
5363         printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
5364         return handle_nop(vcpu);
5365 }
5366 
5367 static int handle_invalid_op(struct kvm_vcpu *vcpu)
5368 {
5369         kvm_queue_exception(vcpu, UD_VECTOR);
5370         return 1;
5371 }
5372 
5373 static int handle_monitor_trap(struct kvm_vcpu *vcpu)
5374 {
5375         return 1;
5376 }
5377 
5378 static int handle_monitor(struct kvm_vcpu *vcpu)
5379 {
5380         printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
5381         return handle_nop(vcpu);
5382 }
5383 
5384 static int handle_invpcid(struct kvm_vcpu *vcpu)
5385 {
5386         u32 vmx_instruction_info;
5387         unsigned long type;
5388         bool pcid_enabled;
5389         gva_t gva;
5390         struct x86_exception e;
5391         unsigned i;
5392         unsigned long roots_to_free = 0;
5393         struct {
5394                 u64 pcid;
5395                 u64 gla;
5396         } operand;
5397 
5398         if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
5399                 kvm_queue_exception(vcpu, UD_VECTOR);
5400                 return 1;
5401         }
5402 
5403         vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
5404         type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
5405 
5406         if (type > 3) {
5407                 kvm_inject_gp(vcpu, 0);
5408                 return 1;
5409         }
5410 
5411         
5412 
5413 
5414         if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
5415                                 vmx_instruction_info, false,
5416                                 sizeof(operand), &gva))
5417                 return 1;
5418 
5419         if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
5420                 kvm_inject_page_fault(vcpu, &e);
5421                 return 1;
5422         }
5423 
5424         if (operand.pcid >> 12 != 0) {
5425                 kvm_inject_gp(vcpu, 0);
5426                 return 1;
5427         }
5428 
5429         pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
5430 
5431         switch (type) {
5432         case INVPCID_TYPE_INDIV_ADDR:
5433                 if ((!pcid_enabled && (operand.pcid != 0)) ||
5434                     is_noncanonical_address(operand.gla, vcpu)) {
5435                         kvm_inject_gp(vcpu, 0);
5436                         return 1;
5437                 }
5438                 kvm_mmu_invpcid_gva(vcpu, operand.gla, operand.pcid);
5439                 return kvm_skip_emulated_instruction(vcpu);
5440 
5441         case INVPCID_TYPE_SINGLE_CTXT:
5442                 if (!pcid_enabled && (operand.pcid != 0)) {
5443                         kvm_inject_gp(vcpu, 0);
5444                         return 1;
5445                 }
5446 
5447                 if (kvm_get_active_pcid(vcpu) == operand.pcid) {
5448                         kvm_mmu_sync_roots(vcpu);
5449                         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
5450                 }
5451 
5452                 for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
5453                         if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3)
5454                             == operand.pcid)
5455                                 roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
5456 
5457                 kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
5458                 
5459 
5460 
5461 
5462 
5463 
5464                 return kvm_skip_emulated_instruction(vcpu);
5465 
5466         case INVPCID_TYPE_ALL_NON_GLOBAL:
5467                 
5468 
5469 
5470 
5471 
5472 
5473 
5474                 
5475         case INVPCID_TYPE_ALL_INCL_GLOBAL:
5476                 kvm_mmu_unload(vcpu);
5477                 return kvm_skip_emulated_instruction(vcpu);
5478 
5479         default:
5480                 BUG(); 
5481         }
5482 }
5483 
5484 static int handle_pml_full(struct kvm_vcpu *vcpu)
5485 {
5486         unsigned long exit_qualification;
5487 
5488         trace_kvm_pml_full(vcpu->vcpu_id);
5489 
5490         exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5491 
5492         
5493 
5494 
5495 
5496         if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
5497                         enable_vnmi &&
5498                         (exit_qualification & INTR_INFO_UNBLOCK_NMI))
5499                 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
5500                                 GUEST_INTR_STATE_NMI);
5501 
5502         
5503 
5504 
5505 
5506         return 1;
5507 }
5508 
5509 static int handle_preemption_timer(struct kvm_vcpu *vcpu)
5510 {
5511         struct vcpu_vmx *vmx = to_vmx(vcpu);
5512 
5513         if (!vmx->req_immediate_exit &&
5514             !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
5515                 kvm_lapic_expired_hv_timer(vcpu);
5516 
5517         return 1;
5518 }
5519 
5520 
5521 
5522 
5523 
5524 static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
5525 {
5526         kvm_queue_exception(vcpu, UD_VECTOR);
5527         return 1;
5528 }
5529 
5530 static int handle_encls(struct kvm_vcpu *vcpu)
5531 {
5532         
5533 
5534 
5535 
5536 
5537         kvm_queue_exception(vcpu, UD_VECTOR);
5538         return 1;
5539 }
5540 
5541 
5542 
5543 
5544 
5545 
5546 static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
5547         [EXIT_REASON_EXCEPTION_NMI]           = handle_exception_nmi,
5548         [EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
5549         [EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
5550         [EXIT_REASON_NMI_WINDOW]              = handle_nmi_window,
5551         [EXIT_REASON_IO_INSTRUCTION]          = handle_io,
5552         [EXIT_REASON_CR_ACCESS]               = handle_cr,
5553         [EXIT_REASON_DR_ACCESS]               = handle_dr,
5554         [EXIT_REASON_CPUID]                   = handle_cpuid,
5555         [EXIT_REASON_MSR_READ]                = handle_rdmsr,
5556         [EXIT_REASON_MSR_WRITE]               = handle_wrmsr,
5557         [EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
5558         [EXIT_REASON_HLT]                     = handle_halt,
5559         [EXIT_REASON_INVD]                    = handle_invd,
5560         [EXIT_REASON_INVLPG]                  = handle_invlpg,
5561         [EXIT_REASON_RDPMC]                   = handle_rdpmc,
5562         [EXIT_REASON_VMCALL]                  = handle_vmcall,
5563         [EXIT_REASON_VMCLEAR]                 = handle_vmx_instruction,
5564         [EXIT_REASON_VMLAUNCH]                = handle_vmx_instruction,
5565         [EXIT_REASON_VMPTRLD]                 = handle_vmx_instruction,
5566         [EXIT_REASON_VMPTRST]                 = handle_vmx_instruction,
5567         [EXIT_REASON_VMREAD]                  = handle_vmx_instruction,
5568         [EXIT_REASON_VMRESUME]                = handle_vmx_instruction,
5569         [EXIT_REASON_VMWRITE]                 = handle_vmx_instruction,
5570         [EXIT_REASON_VMOFF]                   = handle_vmx_instruction,
5571         [EXIT_REASON_VMON]                    = handle_vmx_instruction,
5572         [EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
5573         [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
5574         [EXIT_REASON_APIC_WRITE]              = handle_apic_write,
5575         [EXIT_REASON_EOI_INDUCED]             = handle_apic_eoi_induced,
5576         [EXIT_REASON_WBINVD]                  = handle_wbinvd,
5577         [EXIT_REASON_XSETBV]                  = handle_xsetbv,
5578         [EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
5579         [EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
5580         [EXIT_REASON_GDTR_IDTR]               = handle_desc,
5581         [EXIT_REASON_LDTR_TR]                 = handle_desc,
5582         [EXIT_REASON_EPT_VIOLATION]           = handle_ept_violation,
5583         [EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
5584         [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
5585         [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_mwait,
5586         [EXIT_REASON_MONITOR_TRAP_FLAG]       = handle_monitor_trap,
5587         [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
5588         [EXIT_REASON_INVEPT]                  = handle_vmx_instruction,
5589         [EXIT_REASON_INVVPID]                 = handle_vmx_instruction,
5590         [EXIT_REASON_RDRAND]                  = handle_invalid_op,
5591         [EXIT_REASON_RDSEED]                  = handle_invalid_op,
5592         [EXIT_REASON_PML_FULL]                = handle_pml_full,
5593         [EXIT_REASON_INVPCID]                 = handle_invpcid,
5594         [EXIT_REASON_VMFUNC]                  = handle_vmx_instruction,
5595         [EXIT_REASON_PREEMPTION_TIMER]        = handle_preemption_timer,
5596         [EXIT_REASON_ENCLS]                   = handle_encls,
5597 };
5598 
5599 static const int kvm_vmx_max_exit_handlers =
5600         ARRAY_SIZE(kvm_vmx_exit_handlers);
5601 
5602 static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
5603 {
5604         *info1 = vmcs_readl(EXIT_QUALIFICATION);
5605         *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
5606 }
5607 
5608 static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
5609 {
5610         if (vmx->pml_pg) {
5611                 __free_page(vmx->pml_pg);
5612                 vmx->pml_pg = NULL;
5613         }
5614 }
5615 
5616 static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
5617 {
5618         struct vcpu_vmx *vmx = to_vmx(vcpu);
5619         u64 *pml_buf;
5620         u16 pml_idx;
5621 
5622         pml_idx = vmcs_read16(GUEST_PML_INDEX);
5623 
5624         
5625         if (pml_idx == (PML_ENTITY_NUM - 1))
5626                 return;
5627 
5628         
5629         if (pml_idx >= PML_ENTITY_NUM)
5630                 pml_idx = 0;
5631         else
5632                 pml_idx++;
5633 
5634         pml_buf = page_address(vmx->pml_pg);
5635         for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
5636                 u64 gpa;
5637 
5638                 gpa = pml_buf[pml_idx];
5639                 WARN_ON(gpa & (PAGE_SIZE - 1));
5640                 kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
5641         }
5642 
5643         
5644         vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
5645 }
5646 
5647 
5648 
5649 
5650 
5651 static void kvm_flush_pml_buffers(struct kvm *kvm)
5652 {
5653         int i;
5654         struct kvm_vcpu *vcpu;
5655         
5656 
5657 
5658 
5659 
5660 
5661         kvm_for_each_vcpu(i, vcpu, kvm)
5662                 kvm_vcpu_kick(vcpu);
5663 }
5664 
5665 static void vmx_dump_sel(char *name, uint32_t sel)
5666 {
5667         pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
5668                name, vmcs_read16(sel),
5669                vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR),
5670                vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR),
5671                vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR));
5672 }
5673 
5674 static void vmx_dump_dtsel(char *name, uint32_t limit)
5675 {
5676         pr_err("%s                           limit=0x%08x, base=0x%016lx\n",
5677                name, vmcs_read32(limit),
5678                vmcs_readl(limit + GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
5679 }
5680 
5681 void dump_vmcs(void)
5682 {
5683         u32 vmentry_ctl, vmexit_ctl;
5684         u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control;
5685         unsigned long cr4;
5686         u64 efer;
5687         int i, n;
5688 
5689         if (!dump_invalid_vmcs) {
5690                 pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n");
5691                 return;
5692         }
5693 
5694         vmentry_ctl = vmcs_read32(VM_ENTRY_CONTROLS);
5695         vmexit_ctl = vmcs_read32(VM_EXIT_CONTROLS);
5696         cpu_based_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5697         pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
5698         cr4 = vmcs_readl(GUEST_CR4);
5699         efer = vmcs_read64(GUEST_IA32_EFER);
5700         secondary_exec_control = 0;
5701         if (cpu_has_secondary_exec_ctrls())
5702                 secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
5703 
5704         pr_err("*** Guest State ***\n");
5705         pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
5706                vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
5707                vmcs_readl(CR0_GUEST_HOST_MASK));
5708         pr_err("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
5709                cr4, vmcs_readl(CR4_READ_SHADOW), vmcs_readl(CR4_GUEST_HOST_MASK));
5710         pr_err("CR3 = 0x%016lx\n", vmcs_readl(GUEST_CR3));
5711         if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
5712             (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
5713         {
5714                 pr_err("PDPTR0 = 0x%016llx  PDPTR1 = 0x%016llx\n",
5715                        vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
5716                 pr_err("PDPTR2 = 0x%016llx  PDPTR3 = 0x%016llx\n",
5717                        vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
5718         }
5719         pr_err("RSP = 0x%016lx  RIP = 0x%016lx\n",
5720                vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
5721         pr_err("RFLAGS=0x%08lx         DR7 = 0x%016lx\n",
5722                vmcs_readl(GUEST_RFLAGS), vmcs_readl(GUEST_DR7));
5723         pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
5724                vmcs_readl(GUEST_SYSENTER_ESP),
5725                vmcs_read32(GUEST_SYSENTER_CS), vmcs_readl(GUEST_SYSENTER_EIP));
5726         vmx_dump_sel("CS:  ", GUEST_CS_SELECTOR);
5727         vmx_dump_sel("DS:  ", GUEST_DS_SELECTOR);
5728         vmx_dump_sel("SS:  ", GUEST_SS_SELECTOR);
5729         vmx_dump_sel("ES:  ", GUEST_ES_SELECTOR);
5730         vmx_dump_sel("FS:  ", GUEST_FS_SELECTOR);
5731         vmx_dump_sel("GS:  ", GUEST_GS_SELECTOR);
5732         vmx_dump_dtsel("GDTR:", GUEST_GDTR_LIMIT);
5733         vmx_dump_sel("LDTR:", GUEST_LDTR_SELECTOR);
5734         vmx_dump_dtsel("IDTR:", GUEST_IDTR_LIMIT);
5735         vmx_dump_sel("TR:  ", GUEST_TR_SELECTOR);
5736         if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
5737             (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
5738                 pr_err("EFER =     0x%016llx  PAT = 0x%016llx\n",
5739                        efer, vmcs_read64(GUEST_IA32_PAT));
5740         pr_err("DebugCtl = 0x%016llx  DebugExceptions = 0x%016lx\n",
5741                vmcs_read64(GUEST_IA32_DEBUGCTL),
5742                vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
5743         if (cpu_has_load_perf_global_ctrl() &&
5744             vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
5745                 pr_err("PerfGlobCtl = 0x%016llx\n",
5746                        vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
5747         if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
5748                 pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
5749         pr_err("Interruptibility = %08x  ActivityState = %08x\n",
5750                vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
5751                vmcs_read32(GUEST_ACTIVITY_STATE));
5752         if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
5753                 pr_err("InterruptStatus = %04x\n",
5754                        vmcs_read16(GUEST_INTR_STATUS));
5755 
5756         pr_err("*** Host State ***\n");
5757         pr_err("RIP = 0x%016lx  RSP = 0x%016lx\n",
5758                vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
5759         pr_err("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
5760                vmcs_read16(HOST_CS_SELECTOR), vmcs_read16(HOST_SS_SELECTOR),
5761                vmcs_read16(HOST_DS_SELECTOR), vmcs_read16(HOST_ES_SELECTOR),
5762                vmcs_read16(HOST_FS_SELECTOR), vmcs_read16(HOST_GS_SELECTOR),
5763                vmcs_read16(HOST_TR_SELECTOR));
5764         pr_err("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
5765                vmcs_readl(HOST_FS_BASE), vmcs_readl(HOST_GS_BASE),
5766                vmcs_readl(HOST_TR_BASE));
5767         pr_err("GDTBase=%016lx IDTBase=%016lx\n",
5768                vmcs_readl(HOST_GDTR_BASE), vmcs_readl(HOST_IDTR_BASE));
5769         pr_err("CR0=%016lx CR3=%016lx CR4=%016lx\n",
5770                vmcs_readl(HOST_CR0), vmcs_readl(HOST_CR3),
5771                vmcs_readl(HOST_CR4));
5772         pr_err("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
5773                vmcs_readl(HOST_IA32_SYSENTER_ESP),
5774                vmcs_read32(HOST_IA32_SYSENTER_CS),
5775                vmcs_readl(HOST_IA32_SYSENTER_EIP));
5776         if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
5777                 pr_err("EFER = 0x%016llx  PAT = 0x%016llx\n",
5778                        vmcs_read64(HOST_IA32_EFER),
5779                        vmcs_read64(HOST_IA32_PAT));
5780         if (cpu_has_load_perf_global_ctrl() &&
5781             vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
5782                 pr_err("PerfGlobCtl = 0x%016llx\n",
5783                        vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
5784 
5785         pr_err("*** Control State ***\n");
5786         pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
5787                pin_based_exec_ctrl, cpu_based_exec_ctrl, secondary_exec_control);
5788         pr_err("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
5789         pr_err("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
5790                vmcs_read32(EXCEPTION_BITMAP),
5791                vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK),
5792                vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH));
5793         pr_err("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
5794                vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
5795                vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE),
5796                vmcs_read32(VM_ENTRY_INSTRUCTION_LEN));
5797         pr_err("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
5798                vmcs_read32(VM_EXIT_INTR_INFO),
5799                vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
5800                vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
5801         pr_err("        reason=%08x qualification=%016lx\n",
5802                vmcs_read32(VM_EXIT_REASON), vmcs_readl(EXIT_QUALIFICATION));
5803         pr_err("IDTVectoring: info=%08x errcode=%08x\n",
5804                vmcs_read32(IDT_VECTORING_INFO_FIELD),
5805                vmcs_read32(IDT_VECTORING_ERROR_CODE));
5806         pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
5807         if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
5808                 pr_err("TSC Multiplier = 0x%016llx\n",
5809                        vmcs_read64(TSC_MULTIPLIER));
5810         if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) {
5811                 if (secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
5812                         u16 status = vmcs_read16(GUEST_INTR_STATUS);
5813                         pr_err("SVI|RVI = %02x|%02x ", status >> 8, status & 0xff);
5814                 }
5815                 pr_cont("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
5816                 if (secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
5817                         pr_err("APIC-access addr = 0x%016llx ", vmcs_read64(APIC_ACCESS_ADDR));
5818                 pr_cont("virt-APIC addr = 0x%016llx\n", vmcs_read64(VIRTUAL_APIC_PAGE_ADDR));
5819         }
5820         if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
5821                 pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
5822         if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
5823                 pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
5824         n = vmcs_read32(CR3_TARGET_COUNT);
5825         for (i = 0; i + 1 < n; i += 4)
5826                 pr_err("CR3 target%u=%016lx target%u=%016lx\n",
5827                        i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2),
5828                        i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2));
5829         if (i < n)
5830                 pr_err("CR3 target%u=%016lx\n",
5831                        i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2));
5832         if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING)
5833                 pr_err("PLE Gap=%08x Window=%08x\n",
5834                        vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW));
5835         if (secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
5836                 pr_err("Virtual processor ID = 0x%04x\n",
5837                        vmcs_read16(VIRTUAL_PROCESSOR_ID));
5838 }
5839 
5840 
5841 
5842 
5843 
5844 static int vmx_handle_exit(struct kvm_vcpu *vcpu)
5845 {
5846         struct vcpu_vmx *vmx = to_vmx(vcpu);
5847         u32 exit_reason = vmx->exit_reason;
5848         u32 vectoring_info = vmx->idt_vectoring_info;
5849 
5850         trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
5851 
5852         
5853 
5854 
5855 
5856 
5857 
5858 
5859         if (enable_pml)
5860                 vmx_flush_pml_buffer(vcpu);
5861 
5862         
5863         if (vmx->emulation_required)
5864                 return handle_invalid_guest_state(vcpu);
5865 
5866         if (is_guest_mode(vcpu) && nested_vmx_exit_reflected(vcpu, exit_reason))
5867                 return nested_vmx_reflect_vmexit(vcpu, exit_reason);
5868 
5869         if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
5870                 dump_vmcs();
5871                 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
5872                 vcpu->run->fail_entry.hardware_entry_failure_reason
5873                         = exit_reason;
5874                 return 0;
5875         }
5876 
5877         if (unlikely(vmx->fail)) {
5878                 dump_vmcs();
5879                 vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
5880                 vcpu->run->fail_entry.hardware_entry_failure_reason
5881                         = vmcs_read32(VM_INSTRUCTION_ERROR);
5882                 return 0;
5883         }
5884 
5885         
5886 
5887 
5888 
5889 
5890 
5891 
5892         if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
5893                         (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
5894                         exit_reason != EXIT_REASON_EPT_VIOLATION &&
5895                         exit_reason != EXIT_REASON_PML_FULL &&
5896                         exit_reason != EXIT_REASON_TASK_SWITCH)) {
5897                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5898                 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
5899                 vcpu->run->internal.ndata = 3;
5900                 vcpu->run->internal.data[0] = vectoring_info;
5901                 vcpu->run->internal.data[1] = exit_reason;
5902                 vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
5903                 if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
5904                         vcpu->run->internal.ndata++;
5905                         vcpu->run->internal.data[3] =
5906                                 vmcs_read64(GUEST_PHYSICAL_ADDRESS);
5907                 }
5908                 return 0;
5909         }
5910 
5911         if (unlikely(!enable_vnmi &&
5912                      vmx->loaded_vmcs->soft_vnmi_blocked)) {
5913                 if (vmx_interrupt_allowed(vcpu)) {
5914                         vmx->loaded_vmcs->soft_vnmi_blocked = 0;
5915                 } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
5916                            vcpu->arch.nmi_pending) {
5917                         
5918 
5919 
5920 
5921 
5922 
5923                         printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
5924                                "state on VCPU %d after 1 s timeout\n",
5925                                __func__, vcpu->vcpu_id);
5926                         vmx->loaded_vmcs->soft_vnmi_blocked = 0;
5927                 }
5928         }
5929 
5930         if (exit_reason < kvm_vmx_max_exit_handlers
5931             && kvm_vmx_exit_handlers[exit_reason])
5932                 return kvm_vmx_exit_handlers[exit_reason](vcpu);
5933         else {
5934                 vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
5935                                 exit_reason);
5936                 dump_vmcs();
5937                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5938                 vcpu->run->internal.suberror =
5939                         KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
5940                 vcpu->run->internal.ndata = 1;
5941                 vcpu->run->internal.data[0] = exit_reason;
5942                 return 0;
5943         }
5944 }
5945 
5946 
5947 
5948 
5949 
5950 
5951 
5952 
5953 
5954 
5955 
5956 static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
5957 {
5958         int size = PAGE_SIZE << L1D_CACHE_ORDER;
5959 
5960         
5961 
5962 
5963 
5964         if (static_branch_likely(&vmx_l1d_flush_cond)) {
5965                 bool flush_l1d;
5966 
5967                 
5968 
5969 
5970 
5971 
5972                 flush_l1d = vcpu->arch.l1tf_flush_l1d;
5973                 vcpu->arch.l1tf_flush_l1d = false;
5974 
5975                 
5976 
5977 
5978 
5979                 flush_l1d |= kvm_get_cpu_l1tf_flush_l1d();
5980                 kvm_clear_cpu_l1tf_flush_l1d();
5981 
5982                 if (!flush_l1d)
5983                         return;
5984         }
5985 
5986         vcpu->stat.l1d_flush++;
5987 
5988         if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
5989                 wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
5990                 return;
5991         }
5992 
5993         asm volatile(
5994                 
5995                 "xorl   %%eax, %%eax\n"
5996                 ".Lpopulate_tlb:\n\t"
5997                 "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
5998                 "addl   $4096, %%eax\n\t"
5999                 "cmpl   %%eax, %[size]\n\t"
6000                 "jne    .Lpopulate_tlb\n\t"
6001                 "xorl   %%eax, %%eax\n\t"
6002                 "cpuid\n\t"
6003                 
6004                 "xorl   %%eax, %%eax\n"
6005                 ".Lfill_cache:\n"
6006                 "movzbl (%[flush_pages], %%" _ASM_AX "), %%ecx\n\t"
6007                 "addl   $64, %%eax\n\t"
6008                 "cmpl   %%eax, %[size]\n\t"
6009                 "jne    .Lfill_cache\n\t"
6010                 "lfence\n"
6011                 :: [flush_pages] "r" (vmx_l1d_flush_pages),
6012                     [size] "r" (size)
6013                 : "eax", "ebx", "ecx", "edx");
6014 }
6015 
6016 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
6017 {
6018         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
6019 
6020         if (is_guest_mode(vcpu) &&
6021                 nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
6022                 return;
6023 
6024         if (irr == -1 || tpr < irr) {
6025                 vmcs_write32(TPR_THRESHOLD, 0);
6026                 return;
6027         }
6028 
6029         vmcs_write32(TPR_THRESHOLD, irr);
6030 }
6031 
6032 void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
6033 {
6034         struct vcpu_vmx *vmx = to_vmx(vcpu);
6035         u32 sec_exec_control;
6036 
6037         if (!lapic_in_kernel(vcpu))
6038                 return;
6039 
6040         if (!flexpriority_enabled &&
6041             !cpu_has_vmx_virtualize_x2apic_mode())
6042                 return;
6043 
6044         
6045         if (is_guest_mode(vcpu)) {
6046                 vmx->nested.change_vmcs01_virtual_apic_mode = true;
6047                 return;
6048         }
6049 
6050         sec_exec_control = secondary_exec_controls_get(vmx);
6051         sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
6052                               SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
6053 
6054         switch (kvm_get_apic_mode(vcpu)) {
6055         case LAPIC_MODE_INVALID:
6056                 WARN_ONCE(true, "Invalid local APIC state");
6057         case LAPIC_MODE_DISABLED:
6058                 break;
6059         case LAPIC_MODE_XAPIC:
6060                 if (flexpriority_enabled) {
6061                         sec_exec_control |=
6062                                 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
6063                         vmx_flush_tlb(vcpu, true);
6064                 }
6065                 break;
6066         case LAPIC_MODE_X2APIC:
6067                 if (cpu_has_vmx_virtualize_x2apic_mode())
6068                         sec_exec_control |=
6069                                 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
6070                 break;
6071         }
6072         secondary_exec_controls_set(vmx, sec_exec_control);
6073 
6074         vmx_update_msr_bitmap(vcpu);
6075 }
6076 
6077 static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
6078 {
6079         if (!is_guest_mode(vcpu)) {
6080                 vmcs_write64(APIC_ACCESS_ADDR, hpa);
6081                 vmx_flush_tlb(vcpu, true);
6082         }
6083 }
6084 
6085 static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
6086 {
6087         u16 status;
6088         u8 old;
6089 
6090         if (max_isr == -1)
6091                 max_isr = 0;
6092 
6093         status = vmcs_read16(GUEST_INTR_STATUS);
6094         old = status >> 8;
6095         if (max_isr != old) {
6096                 status &= 0xff;
6097                 status |= max_isr << 8;
6098                 vmcs_write16(GUEST_INTR_STATUS, status);
6099         }
6100 }
6101 
6102 static void vmx_set_rvi(int vector)
6103 {
6104         u16 status;
6105         u8 old;
6106 
6107         if (vector == -1)
6108                 vector = 0;
6109 
6110         status = vmcs_read16(GUEST_INTR_STATUS);
6111         old = (u8)status & 0xff;
6112         if ((u8)vector != old) {
6113                 status &= ~0xff;
6114                 status |= (u8)vector;
6115                 vmcs_write16(GUEST_INTR_STATUS, status);
6116         }
6117 }
6118 
6119 static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
6120 {
6121         
6122 
6123 
6124 
6125 
6126 
6127 
6128 
6129         if (!is_guest_mode(vcpu))
6130                 vmx_set_rvi(max_irr);
6131 }
6132 
6133 static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
6134 {
6135         struct vcpu_vmx *vmx = to_vmx(vcpu);
6136         int max_irr;
6137         bool max_irr_updated;
6138 
6139         WARN_ON(!vcpu->arch.apicv_active);
6140         if (pi_test_on(&vmx->pi_desc)) {
6141                 pi_clear_on(&vmx->pi_desc);
6142                 
6143 
6144 
6145 
6146                 smp_mb__after_atomic();
6147                 max_irr_updated =
6148                         kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
6149 
6150                 
6151 
6152 
6153 
6154 
6155 
6156 
6157 
6158                 if (is_guest_mode(vcpu) && max_irr_updated) {
6159                         if (nested_exit_on_intr(vcpu))
6160                                 kvm_vcpu_exiting_guest_mode(vcpu);
6161                         else
6162                                 kvm_make_request(KVM_REQ_EVENT, vcpu);
6163                 }
6164         } else {
6165                 max_irr = kvm_lapic_find_highest_irr(vcpu);
6166         }
6167         vmx_hwapic_irr_update(vcpu, max_irr);
6168         return max_irr;
6169 }
6170 
6171 static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
6172 {
6173         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
6174 
6175         return pi_test_on(pi_desc) ||
6176                 (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
6177 }
6178 
6179 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
6180 {
6181         if (!kvm_vcpu_apicv_active(vcpu))
6182                 return;
6183 
6184         vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
6185         vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]);
6186         vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]);
6187         vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]);
6188 }
6189 
6190 static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
6191 {
6192         struct vcpu_vmx *vmx = to_vmx(vcpu);
6193 
6194         pi_clear_on(&vmx->pi_desc);
6195         memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
6196 }
6197 
6198 static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
6199 {
6200         vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
6201 
6202         
6203         if (is_page_fault(vmx->exit_intr_info))
6204                 vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
6205 
6206         
6207         if (is_machine_check(vmx->exit_intr_info))
6208                 kvm_machine_check();
6209 
6210         
6211         if (is_nmi(vmx->exit_intr_info)) {
6212                 kvm_before_interrupt(&vmx->vcpu);
6213                 asm("int $2");
6214                 kvm_after_interrupt(&vmx->vcpu);
6215         }
6216 }
6217 
6218 static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
6219 {
6220         unsigned int vector;
6221         unsigned long entry;
6222 #ifdef CONFIG_X86_64
6223         unsigned long tmp;
6224 #endif
6225         gate_desc *desc;
6226         u32 intr_info;
6227 
6228         intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
6229         if (WARN_ONCE(!is_external_intr(intr_info),
6230             "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
6231                 return;
6232 
6233         vector = intr_info & INTR_INFO_VECTOR_MASK;
6234         desc = (gate_desc *)host_idt_base + vector;
6235         entry = gate_offset(desc);
6236 
6237         kvm_before_interrupt(vcpu);
6238 
6239         asm volatile(
6240 #ifdef CONFIG_X86_64
6241                 "mov %%" _ASM_SP ", %[sp]\n\t"
6242                 "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
6243                 "push $%c[ss]\n\t"
6244                 "push %[sp]\n\t"
6245 #endif
6246                 "pushf\n\t"
6247                 __ASM_SIZE(push) " $%c[cs]\n\t"
6248                 CALL_NOSPEC
6249                 :
6250 #ifdef CONFIG_X86_64
6251                 [sp]"=&r"(tmp),
6252 #endif
6253                 ASM_CALL_CONSTRAINT
6254                 :
6255                 THUNK_TARGET(entry),
6256                 [ss]"i"(__KERNEL_DS),
6257                 [cs]"i"(__KERNEL_CS)
6258         );
6259 
6260         kvm_after_interrupt(vcpu);
6261 }
6262 STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
6263 
6264 static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
6265 {
6266         struct vcpu_vmx *vmx = to_vmx(vcpu);
6267 
6268         if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
6269                 handle_external_interrupt_irqoff(vcpu);
6270         else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
6271                 handle_exception_nmi_irqoff(vmx);
6272 }
6273 
6274 static bool vmx_has_emulated_msr(int index)
6275 {
6276         switch (index) {
6277         case MSR_IA32_SMBASE:
6278                 
6279 
6280 
6281 
6282                 return enable_unrestricted_guest || emulate_invalid_guest_state;
6283         case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
6284                 return nested;
6285         case MSR_AMD64_VIRT_SPEC_CTRL:
6286                 
6287                 return false;
6288         default:
6289                 return true;
6290         }
6291 }
6292 
6293 static bool vmx_pt_supported(void)
6294 {
6295         return pt_mode == PT_MODE_HOST_GUEST;
6296 }
6297 
6298 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
6299 {
6300         u32 exit_intr_info;
6301         bool unblock_nmi;
6302         u8 vector;
6303         bool idtv_info_valid;
6304 
6305         idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
6306 
6307         if (enable_vnmi) {
6308                 if (vmx->loaded_vmcs->nmi_known_unmasked)
6309                         return;
6310                 
6311 
6312 
6313 
6314                 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
6315                 unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
6316                 vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
6317                 
6318 
6319 
6320 
6321 
6322 
6323 
6324 
6325 
6326 
6327                 if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
6328                     vector != DF_VECTOR && !idtv_info_valid)
6329                         vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
6330                                       GUEST_INTR_STATE_NMI);
6331                 else
6332                         vmx->loaded_vmcs->nmi_known_unmasked =
6333                                 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
6334                                   & GUEST_INTR_STATE_NMI);
6335         } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
6336                 vmx->loaded_vmcs->vnmi_blocked_time +=
6337                         ktime_to_ns(ktime_sub(ktime_get(),
6338                                               vmx->loaded_vmcs->entry_time));
6339 }
6340 
6341 static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
6342                                       u32 idt_vectoring_info,
6343                                       int instr_len_field,
6344                                       int error_code_field)
6345 {
6346         u8 vector;
6347         int type;
6348         bool idtv_info_valid;
6349 
6350         idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
6351 
6352         vcpu->arch.nmi_injected = false;
6353         kvm_clear_exception_queue(vcpu);
6354         kvm_clear_interrupt_queue(vcpu);
6355 
6356         if (!idtv_info_valid)
6357                 return;
6358 
6359         kvm_make_request(KVM_REQ_EVENT, vcpu);
6360 
6361         vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK;
6362         type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK;
6363 
6364         switch (type) {
6365         case INTR_TYPE_NMI_INTR:
6366                 vcpu->arch.nmi_injected = true;
6367                 
6368 
6369 
6370 
6371 
6372                 vmx_set_nmi_mask(vcpu, false);
6373                 break;
6374         case INTR_TYPE_SOFT_EXCEPTION:
6375                 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
6376                 
6377         case INTR_TYPE_HARD_EXCEPTION:
6378                 if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
6379                         u32 err = vmcs_read32(error_code_field);
6380                         kvm_requeue_exception_e(vcpu, vector, err);
6381                 } else
6382                         kvm_requeue_exception(vcpu, vector);
6383                 break;
6384         case INTR_TYPE_SOFT_INTR:
6385                 vcpu->arch.event_exit_inst_len = vmcs_read32(instr_len_field);
6386                 
6387         case INTR_TYPE_EXT_INTR:
6388                 kvm_queue_interrupt(vcpu, vector, type == INTR_TYPE_SOFT_INTR);
6389                 break;
6390         default:
6391                 break;
6392         }
6393 }
6394 
6395 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
6396 {
6397         __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
6398                                   VM_EXIT_INSTRUCTION_LEN,
6399                                   IDT_VECTORING_ERROR_CODE);
6400 }
6401 
6402 static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
6403 {
6404         __vmx_complete_interrupts(vcpu,
6405                                   vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
6406                                   VM_ENTRY_INSTRUCTION_LEN,
6407                                   VM_ENTRY_EXCEPTION_ERROR_CODE);
6408 
6409         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
6410 }
6411 
6412 static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
6413 {
6414         int i, nr_msrs;
6415         struct perf_guest_switch_msr *msrs;
6416 
6417         msrs = perf_guest_get_msrs(&nr_msrs);
6418 
6419         if (!msrs)
6420                 return;
6421 
6422         for (i = 0; i < nr_msrs; i++)
6423                 if (msrs[i].host == msrs[i].guest)
6424                         clear_atomic_switch_msr(vmx, msrs[i].msr);
6425                 else
6426                         add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
6427                                         msrs[i].host, false);
6428 }
6429 
6430 static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx)
6431 {
6432         u32 host_umwait_control;
6433 
6434         if (!vmx_has_waitpkg(vmx))
6435                 return;
6436 
6437         host_umwait_control = get_umwait_control_msr();
6438 
6439         if (vmx->msr_ia32_umwait_control != host_umwait_control)
6440                 add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL,
6441                         vmx->msr_ia32_umwait_control,
6442                         host_umwait_control, false);
6443         else
6444                 clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL);
6445 }
6446 
6447 static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
6448 {
6449         struct vcpu_vmx *vmx = to_vmx(vcpu);
6450         u64 tscl;
6451         u32 delta_tsc;
6452 
6453         if (vmx->req_immediate_exit) {
6454                 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
6455                 vmx->loaded_vmcs->hv_timer_soft_disabled = false;
6456         } else if (vmx->hv_deadline_tsc != -1) {
6457                 tscl = rdtsc();
6458                 if (vmx->hv_deadline_tsc > tscl)
6459                         
6460                         delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
6461                                 cpu_preemption_timer_multi);
6462                 else
6463                         delta_tsc = 0;
6464 
6465                 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
6466                 vmx->loaded_vmcs->hv_timer_soft_disabled = false;
6467         } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) {
6468                 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1);
6469                 vmx->loaded_vmcs->hv_timer_soft_disabled = true;
6470         }
6471 }
6472 
6473 void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
6474 {
6475         if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
6476                 vmx->loaded_vmcs->host_state.rsp = host_rsp;
6477                 vmcs_writel(HOST_RSP, host_rsp);
6478         }
6479 }
6480 
6481 bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
6482 
6483 static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
6484 {
6485         struct vcpu_vmx *vmx = to_vmx(vcpu);
6486         unsigned long cr3, cr4;
6487 
6488         
6489         if (unlikely(!enable_vnmi &&
6490                      vmx->loaded_vmcs->soft_vnmi_blocked))
6491                 vmx->loaded_vmcs->entry_time = ktime_get();
6492 
6493         
6494 
6495         if (vmx->emulation_required)
6496                 return;
6497 
6498         if (vmx->ple_window_dirty) {
6499                 vmx->ple_window_dirty = false;
6500                 vmcs_write32(PLE_WINDOW, vmx->ple_window);
6501         }
6502 
6503         if (vmx->nested.need_vmcs12_to_shadow_sync)
6504                 nested_sync_vmcs12_to_shadow(vcpu);
6505 
6506         if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
6507                 vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
6508         if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
6509                 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
6510 
6511         cr3 = __get_current_cr3_fast();
6512         if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
6513                 vmcs_writel(HOST_CR3, cr3);
6514                 vmx->loaded_vmcs->host_state.cr3 = cr3;
6515         }
6516 
6517         cr4 = cr4_read_shadow();
6518         if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
6519                 vmcs_writel(HOST_CR4, cr4);
6520                 vmx->loaded_vmcs->host_state.cr4 = cr4;
6521         }
6522 
6523         
6524 
6525 
6526 
6527 
6528         if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
6529                 vmx_set_interrupt_shadow(vcpu, 0);
6530 
6531         kvm_load_guest_xcr0(vcpu);
6532 
6533         pt_guest_enter(vmx);
6534 
6535         atomic_switch_perf_msrs(vmx);
6536         atomic_switch_umwait_control_msr(vmx);
6537 
6538         if (enable_preemption_timer)
6539                 vmx_update_hv_timer(vcpu);
6540 
6541         if (lapic_in_kernel(vcpu) &&
6542                 vcpu->arch.apic->lapic_timer.timer_advance_ns)
6543                 kvm_wait_lapic_expire(vcpu);
6544 
6545         
6546 
6547 
6548 
6549 
6550 
6551         x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
6552 
6553         
6554         if (static_branch_unlikely(&vmx_l1d_should_flush))
6555                 vmx_l1d_flush(vcpu);
6556         else if (static_branch_unlikely(&mds_user_clear))
6557                 mds_clear_cpu_buffers();
6558 
6559         if (vcpu->arch.cr2 != read_cr2())
6560                 write_cr2(vcpu->arch.cr2);
6561 
6562         vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
6563                                    vmx->loaded_vmcs->launched);
6564 
6565         vcpu->arch.cr2 = read_cr2();
6566 
6567         
6568 
6569 
6570 
6571 
6572 
6573 
6574 
6575 
6576 
6577 
6578 
6579 
6580 
6581 
6582         if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
6583                 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
6584 
6585         x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
6586 
6587         
6588         if (static_branch_unlikely(&enable_evmcs))
6589                 current_evmcs->hv_clean_fields |=
6590                         HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
6591 
6592         if (static_branch_unlikely(&enable_evmcs))
6593                 current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
6594 
6595         
6596         if (vmx->host_debugctlmsr)
6597                 update_debugctlmsr(vmx->host_debugctlmsr);
6598 
6599 #ifndef CONFIG_X86_64
6600         
6601 
6602 
6603 
6604 
6605 
6606 
6607 
6608         loadsegment(ds, __USER_DS);
6609         loadsegment(es, __USER_DS);
6610 #endif
6611 
6612         vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
6613                                   | (1 << VCPU_EXREG_RFLAGS)
6614                                   | (1 << VCPU_EXREG_PDPTR)
6615                                   | (1 << VCPU_EXREG_SEGMENTS)
6616                                   | (1 << VCPU_EXREG_CR3));
6617         vcpu->arch.regs_dirty = 0;
6618 
6619         pt_guest_exit(vmx);
6620 
6621         kvm_put_guest_xcr0(vcpu);
6622 
6623         vmx->nested.nested_run_pending = 0;
6624         vmx->idt_vectoring_info = 0;
6625 
6626         vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
6627         if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
6628                 kvm_machine_check();
6629 
6630         if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
6631                 return;
6632 
6633         vmx->loaded_vmcs->launched = 1;
6634         vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
6635 
6636         vmx_recover_nmi_blocking(vmx);
6637         vmx_complete_interrupts(vmx);
6638 }
6639 
6640 static struct kvm *vmx_vm_alloc(void)
6641 {
6642         struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx),
6643                                             GFP_KERNEL_ACCOUNT | __GFP_ZERO,
6644                                             PAGE_KERNEL);
6645 
6646         if (!kvm_vmx)
6647                 return NULL;
6648 
6649         return &kvm_vmx->kvm;
6650 }
6651 
6652 static void vmx_vm_free(struct kvm *kvm)
6653 {
6654         kfree(kvm->arch.hyperv.hv_pa_pg);
6655         vfree(to_kvm_vmx(kvm));
6656 }
6657 
6658 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
6659 {
6660         struct vcpu_vmx *vmx = to_vmx(vcpu);
6661 
6662         if (enable_pml)
6663                 vmx_destroy_pml_buffer(vmx);
6664         free_vpid(vmx->vpid);
6665         nested_vmx_free_vcpu(vcpu);
6666         free_loaded_vmcs(vmx->loaded_vmcs);
6667         kfree(vmx->guest_msrs);
6668         kvm_vcpu_uninit(vcpu);
6669         kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
6670         kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
6671         kmem_cache_free(kvm_vcpu_cache, vmx);
6672 }
6673 
6674 static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
6675 {
6676         int err;
6677         struct vcpu_vmx *vmx;
6678         unsigned long *msr_bitmap;
6679         int cpu;
6680 
6681         BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
6682                 "struct kvm_vcpu must be at offset 0 for arch usercopy region");
6683 
6684         vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
6685         if (!vmx)
6686                 return ERR_PTR(-ENOMEM);
6687 
6688         vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
6689                         GFP_KERNEL_ACCOUNT);
6690         if (!vmx->vcpu.arch.user_fpu) {
6691                 printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
6692                 err = -ENOMEM;
6693                 goto free_partial_vcpu;
6694         }
6695 
6696         vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
6697                         GFP_KERNEL_ACCOUNT);
6698         if (!vmx->vcpu.arch.guest_fpu) {
6699                 printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
6700                 err = -ENOMEM;
6701                 goto free_user_fpu;
6702         }
6703 
6704         vmx->vpid = allocate_vpid();
6705 
6706         err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
6707         if (err)
6708                 goto free_vcpu;
6709 
6710         err = -ENOMEM;
6711 
6712         
6713 
6714 
6715 
6716 
6717 
6718         if (enable_pml) {
6719                 vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
6720                 if (!vmx->pml_pg)
6721                         goto uninit_vcpu;
6722         }
6723 
6724         vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
6725         BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
6726                      > PAGE_SIZE);
6727 
6728         if (!vmx->guest_msrs)
6729                 goto free_pml;
6730 
6731         err = alloc_loaded_vmcs(&vmx->vmcs01);
6732         if (err < 0)
6733                 goto free_msrs;
6734 
6735         msr_bitmap = vmx->vmcs01.msr_bitmap;
6736         vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
6737         vmx_disable_intercept_for_msr(msr_bitmap, MSR_FS_BASE, MSR_TYPE_RW);
6738         vmx_disable_intercept_for_msr(msr_bitmap, MSR_GS_BASE, MSR_TYPE_RW);
6739         vmx_disable_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
6740         vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
6741         vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
6742         vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
6743         if (kvm_cstate_in_guest(kvm)) {
6744                 vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
6745                 vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
6746                 vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
6747                 vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
6748         }
6749         vmx->msr_bitmap_mode = 0;
6750 
6751         vmx->loaded_vmcs = &vmx->vmcs01;
6752         cpu = get_cpu();
6753         vmx_vcpu_load(&vmx->vcpu, cpu);
6754         vmx->vcpu.cpu = cpu;
6755         vmx_vcpu_setup(vmx);
6756         vmx_vcpu_put(&vmx->vcpu);
6757         put_cpu();
6758         if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
6759                 err = alloc_apic_access_page(kvm);
6760                 if (err)
6761                         goto free_vmcs;
6762         }
6763 
6764         if (enable_ept && !enable_unrestricted_guest) {
6765                 err = init_rmode_identity_map(kvm);
6766                 if (err)
6767                         goto free_vmcs;
6768         }
6769 
6770         if (nested)
6771                 nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
6772                                            vmx_capability.ept);
6773         else
6774                 memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs));
6775 
6776         vmx->nested.posted_intr_nv = -1;
6777         vmx->nested.current_vmptr = -1ull;
6778 
6779         vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
6780 
6781         
6782 
6783 
6784 
6785         vmx->pi_desc.nv = POSTED_INTR_VECTOR;
6786         vmx->pi_desc.sn = 1;
6787 
6788         vmx->ept_pointer = INVALID_PAGE;
6789 
6790         return &vmx->vcpu;
6791 
6792 free_vmcs:
6793         free_loaded_vmcs(vmx->loaded_vmcs);
6794 free_msrs:
6795         kfree(vmx->guest_msrs);
6796 free_pml:
6797         vmx_destroy_pml_buffer(vmx);
6798 uninit_vcpu:
6799         kvm_vcpu_uninit(&vmx->vcpu);
6800 free_vcpu:
6801         free_vpid(vmx->vpid);
6802         kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
6803 free_user_fpu:
6804         kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
6805 free_partial_vcpu:
6806         kmem_cache_free(kvm_vcpu_cache, vmx);
6807         return ERR_PTR(err);
6808 }
6809 
6810 #define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
6811 #define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n"
6812 
6813 static int vmx_vm_init(struct kvm *kvm)
6814 {
6815         spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
6816 
6817         if (!ple_gap)
6818                 kvm->arch.pause_in_guest = true;
6819 
6820         if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
6821                 switch (l1tf_mitigation) {
6822                 case L1TF_MITIGATION_OFF:
6823                 case L1TF_MITIGATION_FLUSH_NOWARN:
6824                         
6825                         break;
6826                 case L1TF_MITIGATION_FLUSH:
6827                 case L1TF_MITIGATION_FLUSH_NOSMT:
6828                 case L1TF_MITIGATION_FULL:
6829                         
6830 
6831 
6832 
6833                         if (sched_smt_active())
6834                                 pr_warn_once(L1TF_MSG_SMT);
6835                         if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER)
6836                                 pr_warn_once(L1TF_MSG_L1D);
6837                         break;
6838                 case L1TF_MITIGATION_FULL_FORCE:
6839                         
6840                         break;
6841                 }
6842         }
6843         return 0;
6844 }
6845 
6846 static int __init vmx_check_processor_compat(void)
6847 {
6848         struct vmcs_config vmcs_conf;
6849         struct vmx_capability vmx_cap;
6850 
6851         if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
6852                 return -EIO;
6853         if (nested)
6854                 nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
6855         if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
6856                 printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
6857                                 smp_processor_id());
6858                 return -EIO;
6859         }
6860         return 0;
6861 }
6862 
6863 static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
6864 {
6865         u8 cache;
6866         u64 ipat = 0;
6867 
6868         
6869 
6870 
6871 
6872 
6873 
6874 
6875 
6876 
6877 
6878 
6879         if (is_mmio) {
6880                 cache = MTRR_TYPE_UNCACHABLE;
6881                 goto exit;
6882         }
6883 
6884         if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
6885                 ipat = VMX_EPT_IPAT_BIT;
6886                 cache = MTRR_TYPE_WRBACK;
6887                 goto exit;
6888         }
6889 
6890         if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
6891                 ipat = VMX_EPT_IPAT_BIT;
6892                 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
6893                         cache = MTRR_TYPE_WRBACK;
6894                 else
6895                         cache = MTRR_TYPE_UNCACHABLE;
6896                 goto exit;
6897         }
6898 
6899         cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
6900 
6901 exit:
6902         return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
6903 }
6904 
6905 static int vmx_get_lpage_level(void)
6906 {
6907         if (enable_ept && !cpu_has_vmx_ept_1g_page())
6908                 return PT_DIRECTORY_LEVEL;
6909         else
6910                 
6911                 return PT_PDPE_LEVEL;
6912 }
6913 
6914 static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
6915 {
6916         
6917 
6918 
6919 
6920 
6921 
6922         u32 mask =
6923                 SECONDARY_EXEC_SHADOW_VMCS |
6924                 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
6925                 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
6926                 SECONDARY_EXEC_DESC;
6927 
6928         u32 new_ctl = vmx->secondary_exec_control;
6929         u32 cur_ctl = secondary_exec_controls_get(vmx);
6930 
6931         secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
6932 }
6933 
6934 
6935 
6936 
6937 
6938 static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
6939 {
6940         struct vcpu_vmx *vmx = to_vmx(vcpu);
6941         struct kvm_cpuid_entry2 *entry;
6942 
6943         vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
6944         vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
6945 
6946 #define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do {            \
6947         if (entry && (entry->_reg & (_cpuid_mask)))                     \
6948                 vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask);     \
6949 } while (0)
6950 
6951         entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
6952         cr4_fixed1_update(X86_CR4_VME,        edx, bit(X86_FEATURE_VME));
6953         cr4_fixed1_update(X86_CR4_PVI,        edx, bit(X86_FEATURE_VME));
6954         cr4_fixed1_update(X86_CR4_TSD,        edx, bit(X86_FEATURE_TSC));
6955         cr4_fixed1_update(X86_CR4_DE,         edx, bit(X86_FEATURE_DE));
6956         cr4_fixed1_update(X86_CR4_PSE,        edx, bit(X86_FEATURE_PSE));
6957         cr4_fixed1_update(X86_CR4_PAE,        edx, bit(X86_FEATURE_PAE));
6958         cr4_fixed1_update(X86_CR4_MCE,        edx, bit(X86_FEATURE_MCE));
6959         cr4_fixed1_update(X86_CR4_PGE,        edx, bit(X86_FEATURE_PGE));
6960         cr4_fixed1_update(X86_CR4_OSFXSR,     edx, bit(X86_FEATURE_FXSR));
6961         cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
6962         cr4_fixed1_update(X86_CR4_VMXE,       ecx, bit(X86_FEATURE_VMX));
6963         cr4_fixed1_update(X86_CR4_SMXE,       ecx, bit(X86_FEATURE_SMX));
6964         cr4_fixed1_update(X86_CR4_PCIDE,      ecx, bit(X86_FEATURE_PCID));
6965         cr4_fixed1_update(X86_CR4_OSXSAVE,    ecx, bit(X86_FEATURE_XSAVE));
6966 
6967         entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
6968         cr4_fixed1_update(X86_CR4_FSGSBASE,   ebx, bit(X86_FEATURE_FSGSBASE));
6969         cr4_fixed1_update(X86_CR4_SMEP,       ebx, bit(X86_FEATURE_SMEP));
6970         cr4_fixed1_update(X86_CR4_SMAP,       ebx, bit(X86_FEATURE_SMAP));
6971         cr4_fixed1_update(X86_CR4_PKE,        ecx, bit(X86_FEATURE_PKU));
6972         cr4_fixed1_update(X86_CR4_UMIP,       ecx, bit(X86_FEATURE_UMIP));
6973 
6974 #undef cr4_fixed1_update
6975 }
6976 
6977 static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
6978 {
6979         struct vcpu_vmx *vmx = to_vmx(vcpu);
6980 
6981         if (kvm_mpx_supported()) {
6982                 bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);
6983 
6984                 if (mpx_enabled) {
6985                         vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
6986                         vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
6987                 } else {
6988                         vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;
6989                         vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;
6990                 }
6991         }
6992 }
6993 
6994 static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
6995 {
6996         struct vcpu_vmx *vmx = to_vmx(vcpu);
6997         struct kvm_cpuid_entry2 *best = NULL;
6998         int i;
6999 
7000         for (i = 0; i < PT_CPUID_LEAVES; i++) {
7001                 best = kvm_find_cpuid_entry(vcpu, 0x14, i);
7002                 if (!best)
7003                         return;
7004                 vmx->pt_desc.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM] = best->eax;
7005                 vmx->pt_desc.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM] = best->ebx;
7006                 vmx->pt_desc.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM] = best->ecx;
7007                 vmx->pt_desc.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM] = best->edx;
7008         }
7009 
7010         
7011         vmx->pt_desc.addr_range = intel_pt_validate_cap(vmx->pt_desc.caps,
7012                                                 PT_CAP_num_address_ranges);
7013 
7014         
7015         vmx->pt_desc.ctl_bitmask = ~(RTIT_CTL_TRACEEN | RTIT_CTL_OS |
7016                         RTIT_CTL_USR | RTIT_CTL_TSC_EN | RTIT_CTL_DISRETC);
7017 
7018         
7019 
7020 
7021 
7022         if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_cr3_filtering))
7023                 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_CR3EN;
7024 
7025         
7026 
7027 
7028 
7029         if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc))
7030                 vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_CYCLEACC |
7031                                 RTIT_CTL_CYC_THRESH | RTIT_CTL_PSB_FREQ);
7032 
7033         
7034 
7035 
7036 
7037         if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc))
7038                 vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_MTC_EN |
7039                                 RTIT_CTL_BRANCH_EN | RTIT_CTL_MTC_RANGE);
7040 
7041         
7042         if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_ptwrite))
7043                 vmx->pt_desc.ctl_bitmask &= ~(RTIT_CTL_FUP_ON_PTW |
7044                                                         RTIT_CTL_PTW_EN);
7045 
7046         
7047         if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_power_event_trace))
7048                 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_PWR_EVT_EN;
7049 
7050         
7051         if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_topa_output))
7052                 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_TOPA;
7053 
7054         
7055         if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_output_subsys))
7056                 vmx->pt_desc.ctl_bitmask &= ~RTIT_CTL_FABRIC_EN;
7057 
7058         
7059         for (i = 0; i < vmx->pt_desc.addr_range; i++)
7060                 vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
7061 }
7062 
7063 static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
7064 {
7065         struct vcpu_vmx *vmx = to_vmx(vcpu);
7066 
7067         if (cpu_has_secondary_exec_ctrls()) {
7068                 vmx_compute_secondary_exec_control(vmx);
7069                 vmcs_set_secondary_exec_control(vmx);
7070         }
7071 
7072         if (nested_vmx_allowed(vcpu))
7073                 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
7074                         FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
7075         else
7076                 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
7077                         ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
7078 
7079         if (nested_vmx_allowed(vcpu)) {
7080                 nested_vmx_cr_fixed1_bits_update(vcpu);
7081                 nested_vmx_entry_exit_ctls_update(vcpu);
7082         }
7083 
7084         if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
7085                         guest_cpuid_has(vcpu, X86_FEATURE_INTEL_PT))
7086                 update_intel_pt_cfg(vcpu);
7087 }
7088 
7089 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
7090 {
7091         if (func == 1 && nested)
7092                 entry->ecx |= bit(X86_FEATURE_VMX);
7093 }
7094 
7095 static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
7096 {
7097         to_vmx(vcpu)->req_immediate_exit = true;
7098 }
7099 
7100 static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
7101                                   struct x86_instruction_info *info)
7102 {
7103         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7104         unsigned short port;
7105         bool intercept;
7106         int size;
7107 
7108         if (info->intercept == x86_intercept_in ||
7109             info->intercept == x86_intercept_ins) {
7110                 port = info->src_val;
7111                 size = info->dst_bytes;
7112         } else {
7113                 port = info->dst_val;
7114                 size = info->src_bytes;
7115         }
7116 
7117         
7118 
7119 
7120 
7121 
7122 
7123 
7124         if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
7125                 intercept = nested_cpu_has(vmcs12,
7126                                            CPU_BASED_UNCOND_IO_EXITING);
7127         else
7128                 intercept = nested_vmx_check_io_bitmaps(vcpu, port, size);
7129 
7130         
7131         return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE;
7132 }
7133 
7134 static int vmx_check_intercept(struct kvm_vcpu *vcpu,
7135                                struct x86_instruction_info *info,
7136                                enum x86_intercept_stage stage)
7137 {
7138         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
7139         struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
7140 
7141         switch (info->intercept) {
7142         
7143 
7144 
7145 
7146         case x86_intercept_rdtscp:
7147                 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) {
7148                         ctxt->exception.vector = UD_VECTOR;
7149                         ctxt->exception.error_code_valid = false;
7150                         return X86EMUL_PROPAGATE_FAULT;
7151                 }
7152                 break;
7153 
7154         case x86_intercept_in:
7155         case x86_intercept_ins:
7156         case x86_intercept_out:
7157         case x86_intercept_outs:
7158                 return vmx_check_intercept_io(vcpu, info);
7159 
7160         case x86_intercept_lgdt:
7161         case x86_intercept_lidt:
7162         case x86_intercept_lldt:
7163         case x86_intercept_ltr:
7164         case x86_intercept_sgdt:
7165         case x86_intercept_sidt:
7166         case x86_intercept_sldt:
7167         case x86_intercept_str:
7168                 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC))
7169                         return X86EMUL_CONTINUE;
7170 
7171                 
7172                 break;
7173 
7174         
7175         default:
7176                 break;
7177         }
7178 
7179         return X86EMUL_UNHANDLEABLE;
7180 }
7181 
7182 #ifdef CONFIG_X86_64
7183 
7184 static inline int u64_shl_div_u64(u64 a, unsigned int shift,
7185                                   u64 divisor, u64 *result)
7186 {
7187         u64 low = a << shift, high = a >> (64 - shift);
7188 
7189         
7190         if (high >= divisor)
7191                 return 1;
7192 
7193         
7194         asm("divq %2\n\t" : "=a" (low), "=d" (high) :
7195             "rm" (divisor), "0" (low), "1" (high));
7196         *result = low;
7197 
7198         return 0;
7199 }
7200 
7201 static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
7202                             bool *expired)
7203 {
7204         struct vcpu_vmx *vmx;
7205         u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
7206         struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
7207 
7208         if (kvm_mwait_in_guest(vcpu->kvm) ||
7209                 kvm_can_post_timer_interrupt(vcpu))
7210                 return -EOPNOTSUPP;
7211 
7212         vmx = to_vmx(vcpu);
7213         tscl = rdtsc();
7214         guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
7215         delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
7216         lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
7217                                                     ktimer->timer_advance_ns);
7218 
7219         if (delta_tsc > lapic_timer_advance_cycles)
7220                 delta_tsc -= lapic_timer_advance_cycles;
7221         else
7222                 delta_tsc = 0;
7223 
7224         
7225         if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
7226             delta_tsc && u64_shl_div_u64(delta_tsc,
7227                                 kvm_tsc_scaling_ratio_frac_bits,
7228                                 vcpu->arch.tsc_scaling_ratio, &delta_tsc))
7229                 return -ERANGE;
7230 
7231         
7232 
7233 
7234 
7235 
7236 
7237         if (delta_tsc >> (cpu_preemption_timer_multi + 32))
7238                 return -ERANGE;
7239 
7240         vmx->hv_deadline_tsc = tscl + delta_tsc;
7241         *expired = !delta_tsc;
7242         return 0;
7243 }
7244 
7245 static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
7246 {
7247         to_vmx(vcpu)->hv_deadline_tsc = -1;
7248 }
7249 #endif
7250 
7251 static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
7252 {
7253         if (!kvm_pause_in_guest(vcpu->kvm))
7254                 shrink_ple_window(vcpu);
7255 }
7256 
7257 static void vmx_slot_enable_log_dirty(struct kvm *kvm,
7258                                      struct kvm_memory_slot *slot)
7259 {
7260         kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
7261         kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
7262 }
7263 
7264 static void vmx_slot_disable_log_dirty(struct kvm *kvm,
7265                                        struct kvm_memory_slot *slot)
7266 {
7267         kvm_mmu_slot_set_dirty(kvm, slot);
7268 }
7269 
7270 static void vmx_flush_log_dirty(struct kvm *kvm)
7271 {
7272         kvm_flush_pml_buffers(kvm);
7273 }
7274 
7275 static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
7276 {
7277         struct vmcs12 *vmcs12;
7278         struct vcpu_vmx *vmx = to_vmx(vcpu);
7279         gpa_t gpa, dst;
7280 
7281         if (is_guest_mode(vcpu)) {
7282                 WARN_ON_ONCE(vmx->nested.pml_full);
7283 
7284                 
7285 
7286 
7287 
7288 
7289                 vmcs12 = get_vmcs12(vcpu);
7290                 if (!nested_cpu_has_pml(vmcs12))
7291                         return 0;
7292 
7293                 if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
7294                         vmx->nested.pml_full = true;
7295                         return 1;
7296                 }
7297 
7298                 gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
7299                 dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
7300 
7301                 if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
7302                                          offset_in_page(dst), sizeof(gpa)))
7303                         return 0;
7304 
7305                 vmcs12->guest_pml_index--;
7306         }
7307 
7308         return 0;
7309 }
7310 
7311 static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
7312                                            struct kvm_memory_slot *memslot,
7313                                            gfn_t offset, unsigned long mask)
7314 {
7315         kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
7316 }
7317 
7318 static void __pi_post_block(struct kvm_vcpu *vcpu)
7319 {
7320         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
7321         struct pi_desc old, new;
7322         unsigned int dest;
7323 
7324         do {
7325                 old.control = new.control = pi_desc->control;
7326                 WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
7327                      "Wakeup handler not enabled while the VCPU is blocked\n");
7328 
7329                 dest = cpu_physical_id(vcpu->cpu);
7330 
7331                 if (x2apic_enabled())
7332                         new.ndst = dest;
7333                 else
7334                         new.ndst = (dest << 8) & 0xFF00;
7335 
7336                 
7337                 new.nv = POSTED_INTR_VECTOR;
7338         } while (cmpxchg64(&pi_desc->control, old.control,
7339                            new.control) != old.control);
7340 
7341         if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
7342                 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
7343                 list_del(&vcpu->blocked_vcpu_list);
7344                 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
7345                 vcpu->pre_pcpu = -1;
7346         }
7347 }
7348 
7349 
7350 
7351 
7352 
7353 
7354 
7355 
7356 
7357 
7358 
7359 
7360 
7361 
7362 static int pi_pre_block(struct kvm_vcpu *vcpu)
7363 {
7364         unsigned int dest;
7365         struct pi_desc old, new;
7366         struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
7367 
7368         if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
7369                 !irq_remapping_cap(IRQ_POSTING_CAP)  ||
7370                 !kvm_vcpu_apicv_active(vcpu))
7371                 return 0;
7372 
7373         WARN_ON(irqs_disabled());
7374         local_irq_disable();
7375         if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
7376                 vcpu->pre_pcpu = vcpu->cpu;
7377                 spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
7378                 list_add_tail(&vcpu->blocked_vcpu_list,
7379                               &per_cpu(blocked_vcpu_on_cpu,
7380                                        vcpu->pre_pcpu));
7381                 spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
7382         }
7383 
7384         do {
7385                 old.control = new.control = pi_desc->control;
7386 
7387                 WARN((pi_desc->sn == 1),
7388                      "Warning: SN field of posted-interrupts "
7389                      "is set before blocking\n");
7390 
7391                 
7392 
7393 
7394 
7395 
7396 
7397 
7398 
7399                 dest = cpu_physical_id(vcpu->pre_pcpu);
7400 
7401                 if (x2apic_enabled())
7402                         new.ndst = dest;
7403                 else
7404                         new.ndst = (dest << 8) & 0xFF00;
7405 
7406                 
7407                 new.nv = POSTED_INTR_WAKEUP_VECTOR;
7408         } while (cmpxchg64(&pi_desc->control, old.control,
7409                            new.control) != old.control);
7410 
7411         
7412         if (pi_test_on(pi_desc) == 1)
7413                 __pi_post_block(vcpu);
7414 
7415         local_irq_enable();
7416         return (vcpu->pre_pcpu == -1);
7417 }
7418 
7419 static int vmx_pre_block(struct kvm_vcpu *vcpu)
7420 {
7421         if (pi_pre_block(vcpu))
7422                 return 1;
7423 
7424         if (kvm_lapic_hv_timer_in_use(vcpu))
7425                 kvm_lapic_switch_to_sw_timer(vcpu);
7426 
7427         return 0;
7428 }
7429 
7430 static void pi_post_block(struct kvm_vcpu *vcpu)
7431 {
7432         if (vcpu->pre_pcpu == -1)
7433                 return;
7434 
7435         WARN_ON(irqs_disabled());
7436         local_irq_disable();
7437         __pi_post_block(vcpu);
7438         local_irq_enable();
7439 }
7440 
7441 static void vmx_post_block(struct kvm_vcpu *vcpu)
7442 {
7443         if (kvm_x86_ops->set_hv_timer)
7444                 kvm_lapic_switch_to_hv_timer(vcpu);
7445 
7446         pi_post_block(vcpu);
7447 }
7448 
7449 
7450 
7451 
7452 
7453 
7454 
7455 
7456 
7457 
7458 static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
7459                               uint32_t guest_irq, bool set)
7460 {
7461         struct kvm_kernel_irq_routing_entry *e;
7462         struct kvm_irq_routing_table *irq_rt;
7463         struct kvm_lapic_irq irq;
7464         struct kvm_vcpu *vcpu;
7465         struct vcpu_data vcpu_info;
7466         int idx, ret = 0;
7467 
7468         if (!kvm_arch_has_assigned_device(kvm) ||
7469                 !irq_remapping_cap(IRQ_POSTING_CAP) ||
7470                 !kvm_vcpu_apicv_active(kvm->vcpus[0]))
7471                 return 0;
7472 
7473         idx = srcu_read_lock(&kvm->irq_srcu);
7474         irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
7475         if (guest_irq >= irq_rt->nr_rt_entries ||
7476             hlist_empty(&irq_rt->map[guest_irq])) {
7477                 pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
7478                              guest_irq, irq_rt->nr_rt_entries);
7479                 goto out;
7480         }
7481 
7482         hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
7483                 if (e->type != KVM_IRQ_ROUTING_MSI)
7484                         continue;
7485                 
7486 
7487 
7488 
7489 
7490 
7491 
7492 
7493 
7494 
7495 
7496 
7497 
7498 
7499 
7500 
7501                 kvm_set_msi_irq(kvm, e, &irq);
7502                 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
7503                     !kvm_irq_is_postable(&irq)) {
7504                         
7505 
7506 
7507 
7508                         ret = irq_set_vcpu_affinity(host_irq, NULL);
7509                         if (ret < 0) {
7510                                 printk(KERN_INFO
7511                                    "failed to back to remapped mode, irq: %u\n",
7512                                    host_irq);
7513                                 goto out;
7514                         }
7515 
7516                         continue;
7517                 }
7518 
7519                 vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
7520                 vcpu_info.vector = irq.vector;
7521 
7522                 trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
7523                                 vcpu_info.vector, vcpu_info.pi_desc_addr, set);
7524 
7525                 if (set)
7526                         ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
7527                 else
7528                         ret = irq_set_vcpu_affinity(host_irq, NULL);
7529 
7530                 if (ret < 0) {
7531                         printk(KERN_INFO "%s: failed to update PI IRTE\n",
7532                                         __func__);
7533                         goto out;
7534                 }
7535         }
7536 
7537         ret = 0;
7538 out:
7539         srcu_read_unlock(&kvm->irq_srcu, idx);
7540         return ret;
7541 }
7542 
7543 static void vmx_setup_mce(struct kvm_vcpu *vcpu)
7544 {
7545         if (vcpu->arch.mcg_cap & MCG_LMCE_P)
7546                 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
7547                         FEATURE_CONTROL_LMCE;
7548         else
7549                 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
7550                         ~FEATURE_CONTROL_LMCE;
7551 }
7552 
7553 static int vmx_smi_allowed(struct kvm_vcpu *vcpu)
7554 {
7555         
7556         if (to_vmx(vcpu)->nested.nested_run_pending)
7557                 return 0;
7558         return 1;
7559 }
7560 
7561 static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
7562 {
7563         struct vcpu_vmx *vmx = to_vmx(vcpu);
7564 
7565         vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
7566         if (vmx->nested.smm.guest_mode)
7567                 nested_vmx_vmexit(vcpu, -1, 0, 0);
7568 
7569         vmx->nested.smm.vmxon = vmx->nested.vmxon;
7570         vmx->nested.vmxon = false;
7571         vmx_clear_hlt(vcpu);
7572         return 0;
7573 }
7574 
7575 static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
7576 {
7577         struct vcpu_vmx *vmx = to_vmx(vcpu);
7578         int ret;
7579 
7580         if (vmx->nested.smm.vmxon) {
7581                 vmx->nested.vmxon = true;
7582                 vmx->nested.smm.vmxon = false;
7583         }
7584 
7585         if (vmx->nested.smm.guest_mode) {
7586                 ret = nested_vmx_enter_non_root_mode(vcpu, false);
7587                 if (ret)
7588                         return ret;
7589 
7590                 vmx->nested.smm.guest_mode = false;
7591         }
7592         return 0;
7593 }
7594 
7595 static int enable_smi_window(struct kvm_vcpu *vcpu)
7596 {
7597         return 0;
7598 }
7599 
7600 static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
7601 {
7602         return false;
7603 }
7604 
7605 static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
7606 {
7607         return to_vmx(vcpu)->nested.vmxon;
7608 }
7609 
7610 static __init int hardware_setup(void)
7611 {
7612         unsigned long host_bndcfgs;
7613         struct desc_ptr dt;
7614         int r, i;
7615 
7616         rdmsrl_safe(MSR_EFER, &host_efer);
7617 
7618         store_idt(&dt);
7619         host_idt_base = dt.address;
7620 
7621         for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
7622                 kvm_define_shared_msr(i, vmx_msr_index[i]);
7623 
7624         if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
7625                 return -EIO;
7626 
7627         if (boot_cpu_has(X86_FEATURE_NX))
7628                 kvm_enable_efer_bits(EFER_NX);
7629 
7630         if (boot_cpu_has(X86_FEATURE_MPX)) {
7631                 rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
7632                 WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
7633         }
7634 
7635         if (boot_cpu_has(X86_FEATURE_XSAVES))
7636                 rdmsrl(MSR_IA32_XSS, host_xss);
7637 
7638         if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
7639             !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
7640                 enable_vpid = 0;
7641 
7642         if (!cpu_has_vmx_ept() ||
7643             !cpu_has_vmx_ept_4levels() ||
7644             !cpu_has_vmx_ept_mt_wb() ||
7645             !cpu_has_vmx_invept_global())
7646                 enable_ept = 0;
7647 
7648         if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
7649                 enable_ept_ad_bits = 0;
7650 
7651         if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
7652                 enable_unrestricted_guest = 0;
7653 
7654         if (!cpu_has_vmx_flexpriority())
7655                 flexpriority_enabled = 0;
7656 
7657         if (!cpu_has_virtual_nmis())
7658                 enable_vnmi = 0;
7659 
7660         
7661 
7662 
7663 
7664 
7665         if (!flexpriority_enabled)
7666                 kvm_x86_ops->set_apic_access_page_addr = NULL;
7667 
7668         if (!cpu_has_vmx_tpr_shadow())
7669                 kvm_x86_ops->update_cr8_intercept = NULL;
7670 
7671         if (enable_ept && !cpu_has_vmx_ept_2m_page())
7672                 kvm_disable_largepages();
7673 
7674 #if IS_ENABLED(CONFIG_HYPERV)
7675         if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
7676             && enable_ept) {
7677                 kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb;
7678                 kvm_x86_ops->tlb_remote_flush_with_range =
7679                                 hv_remote_flush_tlb_with_range;
7680         }
7681 #endif
7682 
7683         if (!cpu_has_vmx_ple()) {
7684                 ple_gap = 0;
7685                 ple_window = 0;
7686                 ple_window_grow = 0;
7687                 ple_window_max = 0;
7688                 ple_window_shrink = 0;
7689         }
7690 
7691         if (!cpu_has_vmx_apicv()) {
7692                 enable_apicv = 0;
7693                 kvm_x86_ops->sync_pir_to_irr = NULL;
7694         }
7695 
7696         if (cpu_has_vmx_tsc_scaling()) {
7697                 kvm_has_tsc_control = true;
7698                 kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX;
7699                 kvm_tsc_scaling_ratio_frac_bits = 48;
7700         }
7701 
7702         set_bit(0, vmx_vpid_bitmap); 
7703 
7704         if (enable_ept)
7705                 vmx_enable_tdp();
7706         else
7707                 kvm_disable_tdp();
7708 
7709         
7710 
7711 
7712 
7713         if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
7714                 enable_pml = 0;
7715 
7716         if (!enable_pml) {
7717                 kvm_x86_ops->slot_enable_log_dirty = NULL;
7718                 kvm_x86_ops->slot_disable_log_dirty = NULL;
7719                 kvm_x86_ops->flush_log_dirty = NULL;
7720                 kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
7721         }
7722 
7723         if (!cpu_has_vmx_preemption_timer())
7724                 enable_preemption_timer = false;
7725 
7726         if (enable_preemption_timer) {
7727                 u64 use_timer_freq = 5000ULL * 1000 * 1000;
7728                 u64 vmx_msr;
7729 
7730                 rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
7731                 cpu_preemption_timer_multi =
7732                         vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
7733 
7734                 if (tsc_khz)
7735                         use_timer_freq = (u64)tsc_khz * 1000;
7736                 use_timer_freq >>= cpu_preemption_timer_multi;
7737 
7738                 
7739 
7740 
7741 
7742 
7743                 if (use_timer_freq > 0xffffffffu / 10)
7744                         enable_preemption_timer = false;
7745         }
7746 
7747         if (!enable_preemption_timer) {
7748                 kvm_x86_ops->set_hv_timer = NULL;
7749                 kvm_x86_ops->cancel_hv_timer = NULL;
7750                 kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
7751         }
7752 
7753         kvm_set_posted_intr_wakeup_handler(wakeup_handler);
7754 
7755         kvm_mce_cap_supported |= MCG_LMCE_P;
7756 
7757         if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST)
7758                 return -EINVAL;
7759         if (!enable_ept || !cpu_has_vmx_intel_pt())
7760                 pt_mode = PT_MODE_SYSTEM;
7761 
7762         if (nested) {
7763                 nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
7764                                            vmx_capability.ept);
7765 
7766                 r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
7767                 if (r)
7768                         return r;
7769         }
7770 
7771         r = alloc_kvm_area();
7772         if (r)
7773                 nested_vmx_hardware_unsetup();
7774         return r;
7775 }
7776 
7777 static __exit void hardware_unsetup(void)
7778 {
7779         if (nested)
7780                 nested_vmx_hardware_unsetup();
7781 
7782         free_kvm_area();
7783 }
7784 
7785 static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
7786         .cpu_has_kvm_support = cpu_has_kvm_support,
7787         .disabled_by_bios = vmx_disabled_by_bios,
7788         .hardware_setup = hardware_setup,
7789         .hardware_unsetup = hardware_unsetup,
7790         .check_processor_compatibility = vmx_check_processor_compat,
7791         .hardware_enable = hardware_enable,
7792         .hardware_disable = hardware_disable,
7793         .cpu_has_accelerated_tpr = report_flexpriority,
7794         .has_emulated_msr = vmx_has_emulated_msr,
7795 
7796         .vm_init = vmx_vm_init,
7797         .vm_alloc = vmx_vm_alloc,
7798         .vm_free = vmx_vm_free,
7799 
7800         .vcpu_create = vmx_create_vcpu,
7801         .vcpu_free = vmx_free_vcpu,
7802         .vcpu_reset = vmx_vcpu_reset,
7803 
7804         .prepare_guest_switch = vmx_prepare_switch_to_guest,
7805         .vcpu_load = vmx_vcpu_load,
7806         .vcpu_put = vmx_vcpu_put,
7807 
7808         .update_bp_intercept = update_exception_bitmap,
7809         .get_msr_feature = vmx_get_msr_feature,
7810         .get_msr = vmx_get_msr,
7811         .set_msr = vmx_set_msr,
7812         .get_segment_base = vmx_get_segment_base,
7813         .get_segment = vmx_get_segment,
7814         .set_segment = vmx_set_segment,
7815         .get_cpl = vmx_get_cpl,
7816         .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
7817         .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
7818         .decache_cr3 = vmx_decache_cr3,
7819         .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
7820         .set_cr0 = vmx_set_cr0,
7821         .set_cr3 = vmx_set_cr3,
7822         .set_cr4 = vmx_set_cr4,
7823         .set_efer = vmx_set_efer,
7824         .get_idt = vmx_get_idt,
7825         .set_idt = vmx_set_idt,
7826         .get_gdt = vmx_get_gdt,
7827         .set_gdt = vmx_set_gdt,
7828         .get_dr6 = vmx_get_dr6,
7829         .set_dr6 = vmx_set_dr6,
7830         .set_dr7 = vmx_set_dr7,
7831         .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
7832         .cache_reg = vmx_cache_reg,
7833         .get_rflags = vmx_get_rflags,
7834         .set_rflags = vmx_set_rflags,
7835 
7836         .tlb_flush = vmx_flush_tlb,
7837         .tlb_flush_gva = vmx_flush_tlb_gva,
7838 
7839         .run = vmx_vcpu_run,
7840         .handle_exit = vmx_handle_exit,
7841         .skip_emulated_instruction = skip_emulated_instruction,
7842         .set_interrupt_shadow = vmx_set_interrupt_shadow,
7843         .get_interrupt_shadow = vmx_get_interrupt_shadow,
7844         .patch_hypercall = vmx_patch_hypercall,
7845         .set_irq = vmx_inject_irq,
7846         .set_nmi = vmx_inject_nmi,
7847         .queue_exception = vmx_queue_exception,
7848         .cancel_injection = vmx_cancel_injection,
7849         .interrupt_allowed = vmx_interrupt_allowed,
7850         .nmi_allowed = vmx_nmi_allowed,
7851         .get_nmi_mask = vmx_get_nmi_mask,
7852         .set_nmi_mask = vmx_set_nmi_mask,
7853         .enable_nmi_window = enable_nmi_window,
7854         .enable_irq_window = enable_irq_window,
7855         .update_cr8_intercept = update_cr8_intercept,
7856         .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
7857         .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
7858         .get_enable_apicv = vmx_get_enable_apicv,
7859         .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
7860         .load_eoi_exitmap = vmx_load_eoi_exitmap,
7861         .apicv_post_state_restore = vmx_apicv_post_state_restore,
7862         .hwapic_irr_update = vmx_hwapic_irr_update,
7863         .hwapic_isr_update = vmx_hwapic_isr_update,
7864         .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
7865         .sync_pir_to_irr = vmx_sync_pir_to_irr,
7866         .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
7867         .dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
7868 
7869         .set_tss_addr = vmx_set_tss_addr,
7870         .set_identity_map_addr = vmx_set_identity_map_addr,
7871         .get_tdp_level = get_ept_level,
7872         .get_mt_mask = vmx_get_mt_mask,
7873 
7874         .get_exit_info = vmx_get_exit_info,
7875 
7876         .get_lpage_level = vmx_get_lpage_level,
7877 
7878         .cpuid_update = vmx_cpuid_update,
7879 
7880         .rdtscp_supported = vmx_rdtscp_supported,
7881         .invpcid_supported = vmx_invpcid_supported,
7882 
7883         .set_supported_cpuid = vmx_set_supported_cpuid,
7884 
7885         .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
7886 
7887         .read_l1_tsc_offset = vmx_read_l1_tsc_offset,
7888         .write_l1_tsc_offset = vmx_write_l1_tsc_offset,
7889 
7890         .set_tdp_cr3 = vmx_set_cr3,
7891 
7892         .check_intercept = vmx_check_intercept,
7893         .handle_exit_irqoff = vmx_handle_exit_irqoff,
7894         .mpx_supported = vmx_mpx_supported,
7895         .xsaves_supported = vmx_xsaves_supported,
7896         .umip_emulated = vmx_umip_emulated,
7897         .pt_supported = vmx_pt_supported,
7898         .pku_supported = vmx_pku_supported,
7899 
7900         .request_immediate_exit = vmx_request_immediate_exit,
7901 
7902         .sched_in = vmx_sched_in,
7903 
7904         .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
7905         .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
7906         .flush_log_dirty = vmx_flush_log_dirty,
7907         .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
7908         .write_log_dirty = vmx_write_pml_buffer,
7909 
7910         .pre_block = vmx_pre_block,
7911         .post_block = vmx_post_block,
7912 
7913         .pmu_ops = &intel_pmu_ops,
7914 
7915         .update_pi_irte = vmx_update_pi_irte,
7916 
7917 #ifdef CONFIG_X86_64
7918         .set_hv_timer = vmx_set_hv_timer,
7919         .cancel_hv_timer = vmx_cancel_hv_timer,
7920 #endif
7921 
7922         .setup_mce = vmx_setup_mce,
7923 
7924         .smi_allowed = vmx_smi_allowed,
7925         .pre_enter_smm = vmx_pre_enter_smm,
7926         .pre_leave_smm = vmx_pre_leave_smm,
7927         .enable_smi_window = enable_smi_window,
7928 
7929         .check_nested_events = NULL,
7930         .get_nested_state = NULL,
7931         .set_nested_state = NULL,
7932         .get_vmcs12_pages = NULL,
7933         .nested_enable_evmcs = NULL,
7934         .nested_get_evmcs_version = NULL,
7935         .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
7936         .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
7937 };
7938 
7939 static void vmx_cleanup_l1d_flush(void)
7940 {
7941         if (vmx_l1d_flush_pages) {
7942                 free_pages((unsigned long)vmx_l1d_flush_pages, L1D_CACHE_ORDER);
7943                 vmx_l1d_flush_pages = NULL;
7944         }
7945         
7946         l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
7947 }
7948 
7949 static void vmx_exit(void)
7950 {
7951 #ifdef CONFIG_KEXEC_CORE
7952         RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
7953         synchronize_rcu();
7954 #endif
7955 
7956         kvm_exit();
7957 
7958 #if IS_ENABLED(CONFIG_HYPERV)
7959         if (static_branch_unlikely(&enable_evmcs)) {
7960                 int cpu;
7961                 struct hv_vp_assist_page *vp_ap;
7962                 
7963 
7964 
7965 
7966 
7967                 for_each_online_cpu(cpu) {
7968                         vp_ap = hv_get_vp_assist_page(cpu);
7969 
7970                         if (!vp_ap)
7971                                 continue;
7972 
7973                         vp_ap->nested_control.features.directhypercall = 0;
7974                         vp_ap->current_nested_vmcs = 0;
7975                         vp_ap->enlighten_vmentry = 0;
7976                 }
7977 
7978                 static_branch_disable(&enable_evmcs);
7979         }
7980 #endif
7981         vmx_cleanup_l1d_flush();
7982 }
7983 module_exit(vmx_exit);
7984 
7985 static int __init vmx_init(void)
7986 {
7987         int r, cpu;
7988 
7989 #if IS_ENABLED(CONFIG_HYPERV)
7990         
7991 
7992 
7993 
7994 
7995         if (enlightened_vmcs &&
7996             ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
7997             (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
7998             KVM_EVMCS_VERSION) {
7999                 int cpu;
8000 
8001                 
8002                 for_each_online_cpu(cpu) {
8003                         if (!hv_get_vp_assist_page(cpu)) {
8004                                 enlightened_vmcs = false;
8005                                 break;
8006                         }
8007                 }
8008 
8009                 if (enlightened_vmcs) {
8010                         pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
8011                         static_branch_enable(&enable_evmcs);
8012                 }
8013 
8014                 if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
8015                         vmx_x86_ops.enable_direct_tlbflush
8016                                 = hv_enable_direct_tlbflush;
8017 
8018         } else {
8019                 enlightened_vmcs = false;
8020         }
8021 #endif
8022 
8023         r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
8024                      __alignof__(struct vcpu_vmx), THIS_MODULE);
8025         if (r)
8026                 return r;
8027 
8028         
8029 
8030 
8031 
8032 
8033 
8034 
8035         r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
8036         if (r) {
8037                 vmx_exit();
8038                 return r;
8039         }
8040 
8041         for_each_possible_cpu(cpu) {
8042                 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
8043                 INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
8044                 spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
8045         }
8046 
8047 #ifdef CONFIG_KEXEC_CORE
8048         rcu_assign_pointer(crash_vmclear_loaded_vmcss,
8049                            crash_vmclear_local_loaded_vmcss);
8050 #endif
8051         vmx_check_vmcs12_offsets();
8052 
8053         return 0;
8054 }
8055 module_init(vmx_init);