1/* 2 * Kernel-based Virtual Machine driver for Linux 3 * 4 * AMD SVM support 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 8 * 9 * Authors: 10 * Yaniv Kamay <yaniv@qumranet.com> 11 * Avi Kivity <avi@qumranet.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2. See 14 * the COPYING file in the top-level directory. 15 * 16 */ 17#include <linux/kvm_host.h> 18 19#include "irq.h" 20#include "mmu.h" 21#include "kvm_cache_regs.h" 22#include "x86.h" 23#include "cpuid.h" 24#include "pmu.h" 25 26#include <linux/module.h> 27#include <linux/mod_devicetable.h> 28#include <linux/kernel.h> 29#include <linux/vmalloc.h> 30#include <linux/highmem.h> 31#include <linux/sched.h> 32#include <linux/trace_events.h> 33#include <linux/slab.h> 34 35#include <asm/perf_event.h> 36#include <asm/tlbflush.h> 37#include <asm/desc.h> 38#include <asm/debugreg.h> 39#include <asm/kvm_para.h> 40 41#include <asm/virtext.h> 42#include "trace.h" 43 44#define __ex(x) __kvm_handle_fault_on_reboot(x) 45 46MODULE_AUTHOR("Qumranet"); 47MODULE_LICENSE("GPL"); 48 49static const struct x86_cpu_id svm_cpu_id[] = { 50 X86_FEATURE_MATCH(X86_FEATURE_SVM), 51 {} 52}; 53MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); 54 55#define IOPM_ALLOC_ORDER 2 56#define MSRPM_ALLOC_ORDER 1 57 58#define SEG_TYPE_LDT 2 59#define SEG_TYPE_BUSY_TSS16 3 60 61#define SVM_FEATURE_NPT (1 << 0) 62#define SVM_FEATURE_LBRV (1 << 1) 63#define SVM_FEATURE_SVML (1 << 2) 64#define SVM_FEATURE_NRIP (1 << 3) 65#define SVM_FEATURE_TSC_RATE (1 << 4) 66#define SVM_FEATURE_VMCB_CLEAN (1 << 5) 67#define SVM_FEATURE_FLUSH_ASID (1 << 6) 68#define SVM_FEATURE_DECODE_ASSIST (1 << 7) 69#define SVM_FEATURE_PAUSE_FILTER (1 << 10) 70 71#define NESTED_EXIT_HOST 0 /* Exit handled on host level */ 72#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ 73#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */ 74 75#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) 76 77#define TSC_RATIO_RSVD 0xffffff0000000000ULL 78#define TSC_RATIO_MIN 0x0000000000000001ULL 79#define TSC_RATIO_MAX 0x000000ffffffffffULL 80 81static bool erratum_383_found __read_mostly; 82 83static const u32 host_save_user_msrs[] = { 84#ifdef CONFIG_X86_64 85 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, 86 MSR_FS_BASE, 87#endif 88 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 89}; 90 91#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) 92 93struct kvm_vcpu; 94 95struct nested_state { 96 struct vmcb *hsave; 97 u64 hsave_msr; 98 u64 vm_cr_msr; 99 u64 vmcb; 100 101 /* These are the merged vectors */ 102 u32 *msrpm; 103 104 /* gpa pointers to the real vectors */ 105 u64 vmcb_msrpm; 106 u64 vmcb_iopm; 107 108 /* A VMEXIT is required but not yet emulated */ 109 bool exit_required; 110 111 /* cache for intercepts of the guest */ 112 u32 intercept_cr; 113 u32 intercept_dr; 114 u32 intercept_exceptions; 115 u64 intercept; 116 117 /* Nested Paging related state */ 118 u64 nested_cr3; 119}; 120 121#define MSRPM_OFFSETS 16 122static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; 123 124/* 125 * Set osvw_len to higher value when updated Revision Guides 126 * are published and we know what the new status bits are 127 */ 128static uint64_t osvw_len = 4, osvw_status; 129 130struct vcpu_svm { 131 struct kvm_vcpu vcpu; 132 struct vmcb *vmcb; 133 unsigned long vmcb_pa; 134 struct svm_cpu_data *svm_data; 135 uint64_t asid_generation; 136 uint64_t sysenter_esp; 137 uint64_t sysenter_eip; 138 139 u64 next_rip; 140 141 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; 142 struct { 143 u16 fs; 144 u16 gs; 145 u16 ldt; 146 u64 gs_base; 147 } host; 148 149 u32 *msrpm; 150 151 ulong nmi_iret_rip; 152 153 struct nested_state nested; 154 155 bool nmi_singlestep; 156 157 unsigned int3_injected; 158 unsigned long int3_rip; 159 u32 apf_reason; 160 161 /* cached guest cpuid flags for faster access */ 162 bool nrips_enabled : 1; 163}; 164 165static DEFINE_PER_CPU(u64, current_tsc_ratio); 166#define TSC_RATIO_DEFAULT 0x0100000000ULL 167 168#define MSR_INVALID 0xffffffffU 169 170static const struct svm_direct_access_msrs { 171 u32 index; /* Index of the MSR */ 172 bool always; /* True if intercept is always on */ 173} direct_access_msrs[] = { 174 { .index = MSR_STAR, .always = true }, 175 { .index = MSR_IA32_SYSENTER_CS, .always = true }, 176#ifdef CONFIG_X86_64 177 { .index = MSR_GS_BASE, .always = true }, 178 { .index = MSR_FS_BASE, .always = true }, 179 { .index = MSR_KERNEL_GS_BASE, .always = true }, 180 { .index = MSR_LSTAR, .always = true }, 181 { .index = MSR_CSTAR, .always = true }, 182 { .index = MSR_SYSCALL_MASK, .always = true }, 183#endif 184 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, 185 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, 186 { .index = MSR_IA32_LASTINTFROMIP, .always = false }, 187 { .index = MSR_IA32_LASTINTTOIP, .always = false }, 188 { .index = MSR_INVALID, .always = false }, 189}; 190 191/* enable NPT for AMD64 and X86 with PAE */ 192#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 193static bool npt_enabled = true; 194#else 195static bool npt_enabled; 196#endif 197 198/* allow nested paging (virtualized MMU) for all guests */ 199static int npt = true; 200module_param(npt, int, S_IRUGO); 201 202/* allow nested virtualization in KVM/SVM */ 203static int nested = true; 204module_param(nested, int, S_IRUGO); 205 206static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 207static void svm_flush_tlb(struct kvm_vcpu *vcpu); 208static void svm_complete_interrupts(struct vcpu_svm *svm); 209 210static int nested_svm_exit_handled(struct vcpu_svm *svm); 211static int nested_svm_intercept(struct vcpu_svm *svm); 212static int nested_svm_vmexit(struct vcpu_svm *svm); 213static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 214 bool has_error_code, u32 error_code); 215 216enum { 217 VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, 218 pause filter count */ 219 VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */ 220 VMCB_ASID, /* ASID */ 221 VMCB_INTR, /* int_ctl, int_vector */ 222 VMCB_NPT, /* npt_en, nCR3, gPAT */ 223 VMCB_CR, /* CR0, CR3, CR4, EFER */ 224 VMCB_DR, /* DR6, DR7 */ 225 VMCB_DT, /* GDT, IDT */ 226 VMCB_SEG, /* CS, DS, SS, ES, CPL */ 227 VMCB_CR2, /* CR2 only */ 228 VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ 229 VMCB_DIRTY_MAX, 230}; 231 232/* TPR and CR2 are always written before VMRUN */ 233#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) 234 235static inline void mark_all_dirty(struct vmcb *vmcb) 236{ 237 vmcb->control.clean = 0; 238} 239 240static inline void mark_all_clean(struct vmcb *vmcb) 241{ 242 vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1) 243 & ~VMCB_ALWAYS_DIRTY_MASK; 244} 245 246static inline void mark_dirty(struct vmcb *vmcb, int bit) 247{ 248 vmcb->control.clean &= ~(1 << bit); 249} 250 251static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 252{ 253 return container_of(vcpu, struct vcpu_svm, vcpu); 254} 255 256static void recalc_intercepts(struct vcpu_svm *svm) 257{ 258 struct vmcb_control_area *c, *h; 259 struct nested_state *g; 260 261 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 262 263 if (!is_guest_mode(&svm->vcpu)) 264 return; 265 266 c = &svm->vmcb->control; 267 h = &svm->nested.hsave->control; 268 g = &svm->nested; 269 270 c->intercept_cr = h->intercept_cr | g->intercept_cr; 271 c->intercept_dr = h->intercept_dr | g->intercept_dr; 272 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; 273 c->intercept = h->intercept | g->intercept; 274} 275 276static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm) 277{ 278 if (is_guest_mode(&svm->vcpu)) 279 return svm->nested.hsave; 280 else 281 return svm->vmcb; 282} 283 284static inline void set_cr_intercept(struct vcpu_svm *svm, int bit) 285{ 286 struct vmcb *vmcb = get_host_vmcb(svm); 287 288 vmcb->control.intercept_cr |= (1U << bit); 289 290 recalc_intercepts(svm); 291} 292 293static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit) 294{ 295 struct vmcb *vmcb = get_host_vmcb(svm); 296 297 vmcb->control.intercept_cr &= ~(1U << bit); 298 299 recalc_intercepts(svm); 300} 301 302static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit) 303{ 304 struct vmcb *vmcb = get_host_vmcb(svm); 305 306 return vmcb->control.intercept_cr & (1U << bit); 307} 308 309static inline void set_dr_intercepts(struct vcpu_svm *svm) 310{ 311 struct vmcb *vmcb = get_host_vmcb(svm); 312 313 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ) 314 | (1 << INTERCEPT_DR1_READ) 315 | (1 << INTERCEPT_DR2_READ) 316 | (1 << INTERCEPT_DR3_READ) 317 | (1 << INTERCEPT_DR4_READ) 318 | (1 << INTERCEPT_DR5_READ) 319 | (1 << INTERCEPT_DR6_READ) 320 | (1 << INTERCEPT_DR7_READ) 321 | (1 << INTERCEPT_DR0_WRITE) 322 | (1 << INTERCEPT_DR1_WRITE) 323 | (1 << INTERCEPT_DR2_WRITE) 324 | (1 << INTERCEPT_DR3_WRITE) 325 | (1 << INTERCEPT_DR4_WRITE) 326 | (1 << INTERCEPT_DR5_WRITE) 327 | (1 << INTERCEPT_DR6_WRITE) 328 | (1 << INTERCEPT_DR7_WRITE); 329 330 recalc_intercepts(svm); 331} 332 333static inline void clr_dr_intercepts(struct vcpu_svm *svm) 334{ 335 struct vmcb *vmcb = get_host_vmcb(svm); 336 337 vmcb->control.intercept_dr = 0; 338 339 recalc_intercepts(svm); 340} 341 342static inline void set_exception_intercept(struct vcpu_svm *svm, int bit) 343{ 344 struct vmcb *vmcb = get_host_vmcb(svm); 345 346 vmcb->control.intercept_exceptions |= (1U << bit); 347 348 recalc_intercepts(svm); 349} 350 351static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit) 352{ 353 struct vmcb *vmcb = get_host_vmcb(svm); 354 355 vmcb->control.intercept_exceptions &= ~(1U << bit); 356 357 recalc_intercepts(svm); 358} 359 360static inline void set_intercept(struct vcpu_svm *svm, int bit) 361{ 362 struct vmcb *vmcb = get_host_vmcb(svm); 363 364 vmcb->control.intercept |= (1ULL << bit); 365 366 recalc_intercepts(svm); 367} 368 369static inline void clr_intercept(struct vcpu_svm *svm, int bit) 370{ 371 struct vmcb *vmcb = get_host_vmcb(svm); 372 373 vmcb->control.intercept &= ~(1ULL << bit); 374 375 recalc_intercepts(svm); 376} 377 378static inline void enable_gif(struct vcpu_svm *svm) 379{ 380 svm->vcpu.arch.hflags |= HF_GIF_MASK; 381} 382 383static inline void disable_gif(struct vcpu_svm *svm) 384{ 385 svm->vcpu.arch.hflags &= ~HF_GIF_MASK; 386} 387 388static inline bool gif_set(struct vcpu_svm *svm) 389{ 390 return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); 391} 392 393static unsigned long iopm_base; 394 395struct kvm_ldttss_desc { 396 u16 limit0; 397 u16 base0; 398 unsigned base1:8, type:5, dpl:2, p:1; 399 unsigned limit1:4, zero0:3, g:1, base2:8; 400 u32 base3; 401 u32 zero1; 402} __attribute__((packed)); 403 404struct svm_cpu_data { 405 int cpu; 406 407 u64 asid_generation; 408 u32 max_asid; 409 u32 next_asid; 410 struct kvm_ldttss_desc *tss_desc; 411 412 struct page *save_area; 413}; 414 415static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); 416 417struct svm_init_data { 418 int cpu; 419 int r; 420}; 421 422static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; 423 424#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges) 425#define MSRS_RANGE_SIZE 2048 426#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) 427 428static u32 svm_msrpm_offset(u32 msr) 429{ 430 u32 offset; 431 int i; 432 433 for (i = 0; i < NUM_MSR_MAPS; i++) { 434 if (msr < msrpm_ranges[i] || 435 msr >= msrpm_ranges[i] + MSRS_IN_RANGE) 436 continue; 437 438 offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ 439 offset += (i * MSRS_RANGE_SIZE); /* add range offset */ 440 441 /* Now we have the u8 offset - but need the u32 offset */ 442 return offset / 4; 443 } 444 445 /* MSR not in any range */ 446 return MSR_INVALID; 447} 448 449#define MAX_INST_SIZE 15 450 451static inline void clgi(void) 452{ 453 asm volatile (__ex(SVM_CLGI)); 454} 455 456static inline void stgi(void) 457{ 458 asm volatile (__ex(SVM_STGI)); 459} 460 461static inline void invlpga(unsigned long addr, u32 asid) 462{ 463 asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); 464} 465 466static int get_npt_level(void) 467{ 468#ifdef CONFIG_X86_64 469 return PT64_ROOT_LEVEL; 470#else 471 return PT32E_ROOT_LEVEL; 472#endif 473} 474 475static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) 476{ 477 vcpu->arch.efer = efer; 478 if (!npt_enabled && !(efer & EFER_LMA)) 479 efer &= ~EFER_LME; 480 481 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; 482 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); 483} 484 485static int is_external_interrupt(u32 info) 486{ 487 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; 488 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); 489} 490 491static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) 492{ 493 struct vcpu_svm *svm = to_svm(vcpu); 494 u32 ret = 0; 495 496 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) 497 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; 498 return ret; 499} 500 501static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 502{ 503 struct vcpu_svm *svm = to_svm(vcpu); 504 505 if (mask == 0) 506 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; 507 else 508 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; 509 510} 511 512static void skip_emulated_instruction(struct kvm_vcpu *vcpu) 513{ 514 struct vcpu_svm *svm = to_svm(vcpu); 515 516 if (svm->vmcb->control.next_rip != 0) { 517 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); 518 svm->next_rip = svm->vmcb->control.next_rip; 519 } 520 521 if (!svm->next_rip) { 522 if (emulate_instruction(vcpu, EMULTYPE_SKIP) != 523 EMULATE_DONE) 524 printk(KERN_DEBUG "%s: NOP\n", __func__); 525 return; 526 } 527 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) 528 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n", 529 __func__, kvm_rip_read(vcpu), svm->next_rip); 530 531 kvm_rip_write(vcpu, svm->next_rip); 532 svm_set_interrupt_shadow(vcpu, 0); 533} 534 535static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 536 bool has_error_code, u32 error_code, 537 bool reinject) 538{ 539 struct vcpu_svm *svm = to_svm(vcpu); 540 541 /* 542 * If we are within a nested VM we'd better #VMEXIT and let the guest 543 * handle the exception 544 */ 545 if (!reinject && 546 nested_svm_check_exception(svm, nr, has_error_code, error_code)) 547 return; 548 549 if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { 550 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); 551 552 /* 553 * For guest debugging where we have to reinject #BP if some 554 * INT3 is guest-owned: 555 * Emulate nRIP by moving RIP forward. Will fail if injection 556 * raises a fault that is not intercepted. Still better than 557 * failing in all cases. 558 */ 559 skip_emulated_instruction(&svm->vcpu); 560 rip = kvm_rip_read(&svm->vcpu); 561 svm->int3_rip = rip + svm->vmcb->save.cs.base; 562 svm->int3_injected = rip - old_rip; 563 } 564 565 svm->vmcb->control.event_inj = nr 566 | SVM_EVTINJ_VALID 567 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) 568 | SVM_EVTINJ_TYPE_EXEPT; 569 svm->vmcb->control.event_inj_err = error_code; 570} 571 572static void svm_init_erratum_383(void) 573{ 574 u32 low, high; 575 int err; 576 u64 val; 577 578 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH)) 579 return; 580 581 /* Use _safe variants to not break nested virtualization */ 582 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err); 583 if (err) 584 return; 585 586 val |= (1ULL << 47); 587 588 low = lower_32_bits(val); 589 high = upper_32_bits(val); 590 591 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high); 592 593 erratum_383_found = true; 594} 595 596static void svm_init_osvw(struct kvm_vcpu *vcpu) 597{ 598 /* 599 * Guests should see errata 400 and 415 as fixed (assuming that 600 * HLT and IO instructions are intercepted). 601 */ 602 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3; 603 vcpu->arch.osvw.status = osvw_status & ~(6ULL); 604 605 /* 606 * By increasing VCPU's osvw.length to 3 we are telling the guest that 607 * all osvw.status bits inside that length, including bit 0 (which is 608 * reserved for erratum 298), are valid. However, if host processor's 609 * osvw_len is 0 then osvw_status[0] carries no information. We need to 610 * be conservative here and therefore we tell the guest that erratum 298 611 * is present (because we really don't know). 612 */ 613 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10) 614 vcpu->arch.osvw.status |= 1; 615} 616 617static int has_svm(void) 618{ 619 const char *msg; 620 621 if (!cpu_has_svm(&msg)) { 622 printk(KERN_INFO "has_svm: %s\n", msg); 623 return 0; 624 } 625 626 return 1; 627} 628 629static void svm_hardware_disable(void) 630{ 631 /* Make sure we clean up behind us */ 632 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) 633 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); 634 635 cpu_svm_disable(); 636 637 amd_pmu_disable_virt(); 638} 639 640static int svm_hardware_enable(void) 641{ 642 643 struct svm_cpu_data *sd; 644 uint64_t efer; 645 struct desc_ptr gdt_descr; 646 struct desc_struct *gdt; 647 int me = raw_smp_processor_id(); 648 649 rdmsrl(MSR_EFER, efer); 650 if (efer & EFER_SVME) 651 return -EBUSY; 652 653 if (!has_svm()) { 654 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me); 655 return -EINVAL; 656 } 657 sd = per_cpu(svm_data, me); 658 if (!sd) { 659 pr_err("%s: svm_data is NULL on %d\n", __func__, me); 660 return -EINVAL; 661 } 662 663 sd->asid_generation = 1; 664 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 665 sd->next_asid = sd->max_asid + 1; 666 667 native_store_gdt(&gdt_descr); 668 gdt = (struct desc_struct *)gdt_descr.address; 669 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 670 671 wrmsrl(MSR_EFER, efer | EFER_SVME); 672 673 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); 674 675 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { 676 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); 677 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT); 678 } 679 680 681 /* 682 * Get OSVW bits. 683 * 684 * Note that it is possible to have a system with mixed processor 685 * revisions and therefore different OSVW bits. If bits are not the same 686 * on different processors then choose the worst case (i.e. if erratum 687 * is present on one processor and not on another then assume that the 688 * erratum is present everywhere). 689 */ 690 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) { 691 uint64_t len, status = 0; 692 int err; 693 694 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err); 695 if (!err) 696 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, 697 &err); 698 699 if (err) 700 osvw_status = osvw_len = 0; 701 else { 702 if (len < osvw_len) 703 osvw_len = len; 704 osvw_status |= status; 705 osvw_status &= (1ULL << osvw_len) - 1; 706 } 707 } else 708 osvw_status = osvw_len = 0; 709 710 svm_init_erratum_383(); 711 712 amd_pmu_enable_virt(); 713 714 return 0; 715} 716 717static void svm_cpu_uninit(int cpu) 718{ 719 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id()); 720 721 if (!sd) 722 return; 723 724 per_cpu(svm_data, raw_smp_processor_id()) = NULL; 725 __free_page(sd->save_area); 726 kfree(sd); 727} 728 729static int svm_cpu_init(int cpu) 730{ 731 struct svm_cpu_data *sd; 732 int r; 733 734 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); 735 if (!sd) 736 return -ENOMEM; 737 sd->cpu = cpu; 738 sd->save_area = alloc_page(GFP_KERNEL); 739 r = -ENOMEM; 740 if (!sd->save_area) 741 goto err_1; 742 743 per_cpu(svm_data, cpu) = sd; 744 745 return 0; 746 747err_1: 748 kfree(sd); 749 return r; 750 751} 752 753static bool valid_msr_intercept(u32 index) 754{ 755 int i; 756 757 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) 758 if (direct_access_msrs[i].index == index) 759 return true; 760 761 return false; 762} 763 764static void set_msr_interception(u32 *msrpm, unsigned msr, 765 int read, int write) 766{ 767 u8 bit_read, bit_write; 768 unsigned long tmp; 769 u32 offset; 770 771 /* 772 * If this warning triggers extend the direct_access_msrs list at the 773 * beginning of the file 774 */ 775 WARN_ON(!valid_msr_intercept(msr)); 776 777 offset = svm_msrpm_offset(msr); 778 bit_read = 2 * (msr & 0x0f); 779 bit_write = 2 * (msr & 0x0f) + 1; 780 tmp = msrpm[offset]; 781 782 BUG_ON(offset == MSR_INVALID); 783 784 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); 785 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); 786 787 msrpm[offset] = tmp; 788} 789 790static void svm_vcpu_init_msrpm(u32 *msrpm) 791{ 792 int i; 793 794 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); 795 796 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { 797 if (!direct_access_msrs[i].always) 798 continue; 799 800 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); 801 } 802} 803 804static void add_msr_offset(u32 offset) 805{ 806 int i; 807 808 for (i = 0; i < MSRPM_OFFSETS; ++i) { 809 810 /* Offset already in list? */ 811 if (msrpm_offsets[i] == offset) 812 return; 813 814 /* Slot used by another offset? */ 815 if (msrpm_offsets[i] != MSR_INVALID) 816 continue; 817 818 /* Add offset to list */ 819 msrpm_offsets[i] = offset; 820 821 return; 822 } 823 824 /* 825 * If this BUG triggers the msrpm_offsets table has an overflow. Just 826 * increase MSRPM_OFFSETS in this case. 827 */ 828 BUG(); 829} 830 831static void init_msrpm_offsets(void) 832{ 833 int i; 834 835 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); 836 837 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { 838 u32 offset; 839 840 offset = svm_msrpm_offset(direct_access_msrs[i].index); 841 BUG_ON(offset == MSR_INVALID); 842 843 add_msr_offset(offset); 844 } 845} 846 847static void svm_enable_lbrv(struct vcpu_svm *svm) 848{ 849 u32 *msrpm = svm->msrpm; 850 851 svm->vmcb->control.lbr_ctl = 1; 852 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); 853 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); 854 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); 855 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1); 856} 857 858static void svm_disable_lbrv(struct vcpu_svm *svm) 859{ 860 u32 *msrpm = svm->msrpm; 861 862 svm->vmcb->control.lbr_ctl = 0; 863 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); 864 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); 865 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); 866 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); 867} 868 869static __init int svm_hardware_setup(void) 870{ 871 int cpu; 872 struct page *iopm_pages; 873 void *iopm_va; 874 int r; 875 876 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); 877 878 if (!iopm_pages) 879 return -ENOMEM; 880 881 iopm_va = page_address(iopm_pages); 882 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); 883 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; 884 885 init_msrpm_offsets(); 886 887 if (boot_cpu_has(X86_FEATURE_NX)) 888 kvm_enable_efer_bits(EFER_NX); 889 890 if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) 891 kvm_enable_efer_bits(EFER_FFXSR); 892 893 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { 894 kvm_has_tsc_control = true; 895 kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX; 896 kvm_tsc_scaling_ratio_frac_bits = 32; 897 } 898 899 if (nested) { 900 printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); 901 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); 902 } 903 904 for_each_possible_cpu(cpu) { 905 r = svm_cpu_init(cpu); 906 if (r) 907 goto err; 908 } 909 910 if (!boot_cpu_has(X86_FEATURE_NPT)) 911 npt_enabled = false; 912 913 if (npt_enabled && !npt) { 914 printk(KERN_INFO "kvm: Nested Paging disabled\n"); 915 npt_enabled = false; 916 } 917 918 if (npt_enabled) { 919 printk(KERN_INFO "kvm: Nested Paging enabled\n"); 920 kvm_enable_tdp(); 921 } else 922 kvm_disable_tdp(); 923 924 return 0; 925 926err: 927 __free_pages(iopm_pages, IOPM_ALLOC_ORDER); 928 iopm_base = 0; 929 return r; 930} 931 932static __exit void svm_hardware_unsetup(void) 933{ 934 int cpu; 935 936 for_each_possible_cpu(cpu) 937 svm_cpu_uninit(cpu); 938 939 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); 940 iopm_base = 0; 941} 942 943static void init_seg(struct vmcb_seg *seg) 944{ 945 seg->selector = 0; 946 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | 947 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ 948 seg->limit = 0xffff; 949 seg->base = 0; 950} 951 952static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) 953{ 954 seg->selector = 0; 955 seg->attrib = SVM_SELECTOR_P_MASK | type; 956 seg->limit = 0xffff; 957 seg->base = 0; 958} 959 960static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu) 961{ 962 struct vcpu_svm *svm = to_svm(vcpu); 963 964 return svm->vmcb->control.tsc_offset; 965} 966 967static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) 968{ 969 struct vcpu_svm *svm = to_svm(vcpu); 970 u64 g_tsc_offset = 0; 971 972 if (is_guest_mode(vcpu)) { 973 g_tsc_offset = svm->vmcb->control.tsc_offset - 974 svm->nested.hsave->control.tsc_offset; 975 svm->nested.hsave->control.tsc_offset = offset; 976 } else 977 trace_kvm_write_tsc_offset(vcpu->vcpu_id, 978 svm->vmcb->control.tsc_offset, 979 offset); 980 981 svm->vmcb->control.tsc_offset = offset + g_tsc_offset; 982 983 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 984} 985 986static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) 987{ 988 struct vcpu_svm *svm = to_svm(vcpu); 989 990 svm->vmcb->control.tsc_offset += adjustment; 991 if (is_guest_mode(vcpu)) 992 svm->nested.hsave->control.tsc_offset += adjustment; 993 else 994 trace_kvm_write_tsc_offset(vcpu->vcpu_id, 995 svm->vmcb->control.tsc_offset - adjustment, 996 svm->vmcb->control.tsc_offset); 997 998 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 999} 1000 1001static void init_vmcb(struct vcpu_svm *svm) 1002{ 1003 struct vmcb_control_area *control = &svm->vmcb->control; 1004 struct vmcb_save_area *save = &svm->vmcb->save; 1005 1006 svm->vcpu.fpu_active = 1; 1007 svm->vcpu.arch.hflags = 0; 1008 1009 set_cr_intercept(svm, INTERCEPT_CR0_READ); 1010 set_cr_intercept(svm, INTERCEPT_CR3_READ); 1011 set_cr_intercept(svm, INTERCEPT_CR4_READ); 1012 set_cr_intercept(svm, INTERCEPT_CR0_WRITE); 1013 set_cr_intercept(svm, INTERCEPT_CR3_WRITE); 1014 set_cr_intercept(svm, INTERCEPT_CR4_WRITE); 1015 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 1016 1017 set_dr_intercepts(svm); 1018 1019 set_exception_intercept(svm, PF_VECTOR); 1020 set_exception_intercept(svm, UD_VECTOR); 1021 set_exception_intercept(svm, MC_VECTOR); 1022 set_exception_intercept(svm, AC_VECTOR); 1023 set_exception_intercept(svm, DB_VECTOR); 1024 1025 set_intercept(svm, INTERCEPT_INTR); 1026 set_intercept(svm, INTERCEPT_NMI); 1027 set_intercept(svm, INTERCEPT_SMI); 1028 set_intercept(svm, INTERCEPT_SELECTIVE_CR0); 1029 set_intercept(svm, INTERCEPT_RDPMC); 1030 set_intercept(svm, INTERCEPT_CPUID); 1031 set_intercept(svm, INTERCEPT_INVD); 1032 set_intercept(svm, INTERCEPT_HLT); 1033 set_intercept(svm, INTERCEPT_INVLPG); 1034 set_intercept(svm, INTERCEPT_INVLPGA); 1035 set_intercept(svm, INTERCEPT_IOIO_PROT); 1036 set_intercept(svm, INTERCEPT_MSR_PROT); 1037 set_intercept(svm, INTERCEPT_TASK_SWITCH); 1038 set_intercept(svm, INTERCEPT_SHUTDOWN); 1039 set_intercept(svm, INTERCEPT_VMRUN); 1040 set_intercept(svm, INTERCEPT_VMMCALL); 1041 set_intercept(svm, INTERCEPT_VMLOAD); 1042 set_intercept(svm, INTERCEPT_VMSAVE); 1043 set_intercept(svm, INTERCEPT_STGI); 1044 set_intercept(svm, INTERCEPT_CLGI); 1045 set_intercept(svm, INTERCEPT_SKINIT); 1046 set_intercept(svm, INTERCEPT_WBINVD); 1047 set_intercept(svm, INTERCEPT_MONITOR); 1048 set_intercept(svm, INTERCEPT_MWAIT); 1049 set_intercept(svm, INTERCEPT_XSETBV); 1050 1051 control->iopm_base_pa = iopm_base; 1052 control->msrpm_base_pa = __pa(svm->msrpm); 1053 control->int_ctl = V_INTR_MASKING_MASK; 1054 1055 init_seg(&save->es); 1056 init_seg(&save->ss); 1057 init_seg(&save->ds); 1058 init_seg(&save->fs); 1059 init_seg(&save->gs); 1060 1061 save->cs.selector = 0xf000; 1062 save->cs.base = 0xffff0000; 1063 /* Executable/Readable Code Segment */ 1064 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | 1065 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; 1066 save->cs.limit = 0xffff; 1067 1068 save->gdtr.limit = 0xffff; 1069 save->idtr.limit = 0xffff; 1070 1071 init_sys_seg(&save->ldtr, SEG_TYPE_LDT); 1072 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); 1073 1074 svm_set_efer(&svm->vcpu, 0); 1075 save->dr6 = 0xffff0ff0; 1076 kvm_set_rflags(&svm->vcpu, 2); 1077 save->rip = 0x0000fff0; 1078 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; 1079 1080 /* 1081 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. 1082 * It also updates the guest-visible cr0 value. 1083 */ 1084 svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); 1085 kvm_mmu_reset_context(&svm->vcpu); 1086 1087 save->cr4 = X86_CR4_PAE; 1088 /* rdx = ?? */ 1089 1090 if (npt_enabled) { 1091 /* Setup VMCB for Nested Paging */ 1092 control->nested_ctl = 1; 1093 clr_intercept(svm, INTERCEPT_INVLPG); 1094 clr_exception_intercept(svm, PF_VECTOR); 1095 clr_cr_intercept(svm, INTERCEPT_CR3_READ); 1096 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); 1097 save->g_pat = svm->vcpu.arch.pat; 1098 save->cr3 = 0; 1099 save->cr4 = 0; 1100 } 1101 svm->asid_generation = 0; 1102 1103 svm->nested.vmcb = 0; 1104 svm->vcpu.arch.hflags = 0; 1105 1106 if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { 1107 control->pause_filter_count = 3000; 1108 set_intercept(svm, INTERCEPT_PAUSE); 1109 } 1110 1111 mark_all_dirty(svm->vmcb); 1112 1113 enable_gif(svm); 1114} 1115 1116static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) 1117{ 1118 struct vcpu_svm *svm = to_svm(vcpu); 1119 u32 dummy; 1120 u32 eax = 1; 1121 1122 if (!init_event) { 1123 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | 1124 MSR_IA32_APICBASE_ENABLE; 1125 if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) 1126 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 1127 } 1128 init_vmcb(svm); 1129 1130 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); 1131 kvm_register_write(vcpu, VCPU_REGS_RDX, eax); 1132} 1133 1134static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) 1135{ 1136 struct vcpu_svm *svm; 1137 struct page *page; 1138 struct page *msrpm_pages; 1139 struct page *hsave_page; 1140 struct page *nested_msrpm_pages; 1141 int err; 1142 1143 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 1144 if (!svm) { 1145 err = -ENOMEM; 1146 goto out; 1147 } 1148 1149 err = kvm_vcpu_init(&svm->vcpu, kvm, id); 1150 if (err) 1151 goto free_svm; 1152 1153 err = -ENOMEM; 1154 page = alloc_page(GFP_KERNEL); 1155 if (!page) 1156 goto uninit; 1157 1158 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 1159 if (!msrpm_pages) 1160 goto free_page1; 1161 1162 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 1163 if (!nested_msrpm_pages) 1164 goto free_page2; 1165 1166 hsave_page = alloc_page(GFP_KERNEL); 1167 if (!hsave_page) 1168 goto free_page3; 1169 1170 svm->nested.hsave = page_address(hsave_page); 1171 1172 svm->msrpm = page_address(msrpm_pages); 1173 svm_vcpu_init_msrpm(svm->msrpm); 1174 1175 svm->nested.msrpm = page_address(nested_msrpm_pages); 1176 svm_vcpu_init_msrpm(svm->nested.msrpm); 1177 1178 svm->vmcb = page_address(page); 1179 clear_page(svm->vmcb); 1180 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 1181 svm->asid_generation = 0; 1182 init_vmcb(svm); 1183 1184 svm_init_osvw(&svm->vcpu); 1185 1186 return &svm->vcpu; 1187 1188free_page3: 1189 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); 1190free_page2: 1191 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); 1192free_page1: 1193 __free_page(page); 1194uninit: 1195 kvm_vcpu_uninit(&svm->vcpu); 1196free_svm: 1197 kmem_cache_free(kvm_vcpu_cache, svm); 1198out: 1199 return ERR_PTR(err); 1200} 1201 1202static void svm_free_vcpu(struct kvm_vcpu *vcpu) 1203{ 1204 struct vcpu_svm *svm = to_svm(vcpu); 1205 1206 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); 1207 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); 1208 __free_page(virt_to_page(svm->nested.hsave)); 1209 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); 1210 kvm_vcpu_uninit(vcpu); 1211 kmem_cache_free(kvm_vcpu_cache, svm); 1212} 1213 1214static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1215{ 1216 struct vcpu_svm *svm = to_svm(vcpu); 1217 int i; 1218 1219 if (unlikely(cpu != vcpu->cpu)) { 1220 svm->asid_generation = 0; 1221 mark_all_dirty(svm->vmcb); 1222 } 1223 1224#ifdef CONFIG_X86_64 1225 rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); 1226#endif 1227 savesegment(fs, svm->host.fs); 1228 savesegment(gs, svm->host.gs); 1229 svm->host.ldt = kvm_read_ldt(); 1230 1231 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 1232 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1233 1234 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { 1235 u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; 1236 if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) { 1237 __this_cpu_write(current_tsc_ratio, tsc_ratio); 1238 wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); 1239 } 1240 } 1241} 1242 1243static void svm_vcpu_put(struct kvm_vcpu *vcpu) 1244{ 1245 struct vcpu_svm *svm = to_svm(vcpu); 1246 int i; 1247 1248 ++vcpu->stat.host_state_reload; 1249 kvm_load_ldt(svm->host.ldt); 1250#ifdef CONFIG_X86_64 1251 loadsegment(fs, svm->host.fs); 1252 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); 1253 load_gs_index(svm->host.gs); 1254#else 1255#ifdef CONFIG_X86_32_LAZY_GS 1256 loadsegment(gs, svm->host.gs); 1257#endif 1258#endif 1259 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 1260 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1261} 1262 1263static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 1264{ 1265 return to_svm(vcpu)->vmcb->save.rflags; 1266} 1267 1268static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1269{ 1270 /* 1271 * Any change of EFLAGS.VM is accompained by a reload of SS 1272 * (caused by either a task switch or an inter-privilege IRET), 1273 * so we do not need to update the CPL here. 1274 */ 1275 to_svm(vcpu)->vmcb->save.rflags = rflags; 1276} 1277 1278static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) 1279{ 1280 switch (reg) { 1281 case VCPU_EXREG_PDPTR: 1282 BUG_ON(!npt_enabled); 1283 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); 1284 break; 1285 default: 1286 BUG(); 1287 } 1288} 1289 1290static void svm_set_vintr(struct vcpu_svm *svm) 1291{ 1292 set_intercept(svm, INTERCEPT_VINTR); 1293} 1294 1295static void svm_clear_vintr(struct vcpu_svm *svm) 1296{ 1297 clr_intercept(svm, INTERCEPT_VINTR); 1298} 1299 1300static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) 1301{ 1302 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; 1303 1304 switch (seg) { 1305 case VCPU_SREG_CS: return &save->cs; 1306 case VCPU_SREG_DS: return &save->ds; 1307 case VCPU_SREG_ES: return &save->es; 1308 case VCPU_SREG_FS: return &save->fs; 1309 case VCPU_SREG_GS: return &save->gs; 1310 case VCPU_SREG_SS: return &save->ss; 1311 case VCPU_SREG_TR: return &save->tr; 1312 case VCPU_SREG_LDTR: return &save->ldtr; 1313 } 1314 BUG(); 1315 return NULL; 1316} 1317 1318static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg) 1319{ 1320 struct vmcb_seg *s = svm_seg(vcpu, seg); 1321 1322 return s->base; 1323} 1324 1325static void svm_get_segment(struct kvm_vcpu *vcpu, 1326 struct kvm_segment *var, int seg) 1327{ 1328 struct vmcb_seg *s = svm_seg(vcpu, seg); 1329 1330 var->base = s->base; 1331 var->limit = s->limit; 1332 var->selector = s->selector; 1333 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK; 1334 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1; 1335 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; 1336 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1; 1337 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1; 1338 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 1339 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 1340 1341 /* 1342 * AMD CPUs circa 2014 track the G bit for all segments except CS. 1343 * However, the SVM spec states that the G bit is not observed by the 1344 * CPU, and some VMware virtual CPUs drop the G bit for all segments. 1345 * So let's synthesize a legal G bit for all segments, this helps 1346 * running KVM nested. It also helps cross-vendor migration, because 1347 * Intel's vmentry has a check on the 'G' bit. 1348 */ 1349 var->g = s->limit > 0xfffff; 1350 1351 /* 1352 * AMD's VMCB does not have an explicit unusable field, so emulate it 1353 * for cross vendor migration purposes by "not present" 1354 */ 1355 var->unusable = !var->present || (var->type == 0); 1356 1357 switch (seg) { 1358 case VCPU_SREG_TR: 1359 /* 1360 * Work around a bug where the busy flag in the tr selector 1361 * isn't exposed 1362 */ 1363 var->type |= 0x2; 1364 break; 1365 case VCPU_SREG_DS: 1366 case VCPU_SREG_ES: 1367 case VCPU_SREG_FS: 1368 case VCPU_SREG_GS: 1369 /* 1370 * The accessed bit must always be set in the segment 1371 * descriptor cache, although it can be cleared in the 1372 * descriptor, the cached bit always remains at 1. Since 1373 * Intel has a check on this, set it here to support 1374 * cross-vendor migration. 1375 */ 1376 if (!var->unusable) 1377 var->type |= 0x1; 1378 break; 1379 case VCPU_SREG_SS: 1380 /* 1381 * On AMD CPUs sometimes the DB bit in the segment 1382 * descriptor is left as 1, although the whole segment has 1383 * been made unusable. Clear it here to pass an Intel VMX 1384 * entry check when cross vendor migrating. 1385 */ 1386 if (var->unusable) 1387 var->db = 0; 1388 var->dpl = to_svm(vcpu)->vmcb->save.cpl; 1389 break; 1390 } 1391} 1392 1393static int svm_get_cpl(struct kvm_vcpu *vcpu) 1394{ 1395 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; 1396 1397 return save->cpl; 1398} 1399 1400static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1401{ 1402 struct vcpu_svm *svm = to_svm(vcpu); 1403 1404 dt->size = svm->vmcb->save.idtr.limit; 1405 dt->address = svm->vmcb->save.idtr.base; 1406} 1407 1408static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1409{ 1410 struct vcpu_svm *svm = to_svm(vcpu); 1411 1412 svm->vmcb->save.idtr.limit = dt->size; 1413 svm->vmcb->save.idtr.base = dt->address ; 1414 mark_dirty(svm->vmcb, VMCB_DT); 1415} 1416 1417static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1418{ 1419 struct vcpu_svm *svm = to_svm(vcpu); 1420 1421 dt->size = svm->vmcb->save.gdtr.limit; 1422 dt->address = svm->vmcb->save.gdtr.base; 1423} 1424 1425static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1426{ 1427 struct vcpu_svm *svm = to_svm(vcpu); 1428 1429 svm->vmcb->save.gdtr.limit = dt->size; 1430 svm->vmcb->save.gdtr.base = dt->address ; 1431 mark_dirty(svm->vmcb, VMCB_DT); 1432} 1433 1434static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) 1435{ 1436} 1437 1438static void svm_decache_cr3(struct kvm_vcpu *vcpu) 1439{ 1440} 1441 1442static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1443{ 1444} 1445 1446static void update_cr0_intercept(struct vcpu_svm *svm) 1447{ 1448 ulong gcr0 = svm->vcpu.arch.cr0; 1449 u64 *hcr0 = &svm->vmcb->save.cr0; 1450 1451 if (!svm->vcpu.fpu_active) 1452 *hcr0 |= SVM_CR0_SELECTIVE_MASK; 1453 else 1454 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) 1455 | (gcr0 & SVM_CR0_SELECTIVE_MASK); 1456 1457 mark_dirty(svm->vmcb, VMCB_CR); 1458 1459 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { 1460 clr_cr_intercept(svm, INTERCEPT_CR0_READ); 1461 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); 1462 } else { 1463 set_cr_intercept(svm, INTERCEPT_CR0_READ); 1464 set_cr_intercept(svm, INTERCEPT_CR0_WRITE); 1465 } 1466} 1467 1468static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1469{ 1470 struct vcpu_svm *svm = to_svm(vcpu); 1471 1472#ifdef CONFIG_X86_64 1473 if (vcpu->arch.efer & EFER_LME) { 1474 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 1475 vcpu->arch.efer |= EFER_LMA; 1476 svm->vmcb->save.efer |= EFER_LMA | EFER_LME; 1477 } 1478 1479 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { 1480 vcpu->arch.efer &= ~EFER_LMA; 1481 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); 1482 } 1483 } 1484#endif 1485 vcpu->arch.cr0 = cr0; 1486 1487 if (!npt_enabled) 1488 cr0 |= X86_CR0_PG | X86_CR0_WP; 1489 1490 if (!vcpu->fpu_active) 1491 cr0 |= X86_CR0_TS; 1492 /* 1493 * re-enable caching here because the QEMU bios 1494 * does not do it - this results in some delay at 1495 * reboot 1496 */ 1497 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) 1498 cr0 &= ~(X86_CR0_CD | X86_CR0_NW); 1499 svm->vmcb->save.cr0 = cr0; 1500 mark_dirty(svm->vmcb, VMCB_CR); 1501 update_cr0_intercept(svm); 1502} 1503 1504static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1505{ 1506 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; 1507 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; 1508 1509 if (cr4 & X86_CR4_VMXE) 1510 return 1; 1511 1512 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) 1513 svm_flush_tlb(vcpu); 1514 1515 vcpu->arch.cr4 = cr4; 1516 if (!npt_enabled) 1517 cr4 |= X86_CR4_PAE; 1518 cr4 |= host_cr4_mce; 1519 to_svm(vcpu)->vmcb->save.cr4 = cr4; 1520 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); 1521 return 0; 1522} 1523 1524static void svm_set_segment(struct kvm_vcpu *vcpu, 1525 struct kvm_segment *var, int seg) 1526{ 1527 struct vcpu_svm *svm = to_svm(vcpu); 1528 struct vmcb_seg *s = svm_seg(vcpu, seg); 1529 1530 s->base = var->base; 1531 s->limit = var->limit; 1532 s->selector = var->selector; 1533 if (var->unusable) 1534 s->attrib = 0; 1535 else { 1536 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); 1537 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; 1538 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; 1539 s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; 1540 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; 1541 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; 1542 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; 1543 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; 1544 } 1545 1546 /* 1547 * This is always accurate, except if SYSRET returned to a segment 1548 * with SS.DPL != 3. Intel does not have this quirk, and always 1549 * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it 1550 * would entail passing the CPL to userspace and back. 1551 */ 1552 if (seg == VCPU_SREG_SS) 1553 svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; 1554 1555 mark_dirty(svm->vmcb, VMCB_SEG); 1556} 1557 1558static void update_bp_intercept(struct kvm_vcpu *vcpu) 1559{ 1560 struct vcpu_svm *svm = to_svm(vcpu); 1561 1562 clr_exception_intercept(svm, BP_VECTOR); 1563 1564 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { 1565 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 1566 set_exception_intercept(svm, BP_VECTOR); 1567 } else 1568 vcpu->guest_debug = 0; 1569} 1570 1571static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) 1572{ 1573 if (sd->next_asid > sd->max_asid) { 1574 ++sd->asid_generation; 1575 sd->next_asid = 1; 1576 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 1577 } 1578 1579 svm->asid_generation = sd->asid_generation; 1580 svm->vmcb->control.asid = sd->next_asid++; 1581 1582 mark_dirty(svm->vmcb, VMCB_ASID); 1583} 1584 1585static u64 svm_get_dr6(struct kvm_vcpu *vcpu) 1586{ 1587 return to_svm(vcpu)->vmcb->save.dr6; 1588} 1589 1590static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) 1591{ 1592 struct vcpu_svm *svm = to_svm(vcpu); 1593 1594 svm->vmcb->save.dr6 = value; 1595 mark_dirty(svm->vmcb, VMCB_DR); 1596} 1597 1598static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) 1599{ 1600 struct vcpu_svm *svm = to_svm(vcpu); 1601 1602 get_debugreg(vcpu->arch.db[0], 0); 1603 get_debugreg(vcpu->arch.db[1], 1); 1604 get_debugreg(vcpu->arch.db[2], 2); 1605 get_debugreg(vcpu->arch.db[3], 3); 1606 vcpu->arch.dr6 = svm_get_dr6(vcpu); 1607 vcpu->arch.dr7 = svm->vmcb->save.dr7; 1608 1609 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; 1610 set_dr_intercepts(svm); 1611} 1612 1613static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) 1614{ 1615 struct vcpu_svm *svm = to_svm(vcpu); 1616 1617 svm->vmcb->save.dr7 = value; 1618 mark_dirty(svm->vmcb, VMCB_DR); 1619} 1620 1621static int pf_interception(struct vcpu_svm *svm) 1622{ 1623 u64 fault_address = svm->vmcb->control.exit_info_2; 1624 u32 error_code; 1625 int r = 1; 1626 1627 switch (svm->apf_reason) { 1628 default: 1629 error_code = svm->vmcb->control.exit_info_1; 1630 1631 trace_kvm_page_fault(fault_address, error_code); 1632 if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) 1633 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); 1634 r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, 1635 svm->vmcb->control.insn_bytes, 1636 svm->vmcb->control.insn_len); 1637 break; 1638 case KVM_PV_REASON_PAGE_NOT_PRESENT: 1639 svm->apf_reason = 0; 1640 local_irq_disable(); 1641 kvm_async_pf_task_wait(fault_address); 1642 local_irq_enable(); 1643 break; 1644 case KVM_PV_REASON_PAGE_READY: 1645 svm->apf_reason = 0; 1646 local_irq_disable(); 1647 kvm_async_pf_task_wake(fault_address); 1648 local_irq_enable(); 1649 break; 1650 } 1651 return r; 1652} 1653 1654static int db_interception(struct vcpu_svm *svm) 1655{ 1656 struct kvm_run *kvm_run = svm->vcpu.run; 1657 1658 if (!(svm->vcpu.guest_debug & 1659 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && 1660 !svm->nmi_singlestep) { 1661 kvm_queue_exception(&svm->vcpu, DB_VECTOR); 1662 return 1; 1663 } 1664 1665 if (svm->nmi_singlestep) { 1666 svm->nmi_singlestep = false; 1667 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) 1668 svm->vmcb->save.rflags &= 1669 ~(X86_EFLAGS_TF | X86_EFLAGS_RF); 1670 } 1671 1672 if (svm->vcpu.guest_debug & 1673 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { 1674 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1675 kvm_run->debug.arch.pc = 1676 svm->vmcb->save.cs.base + svm->vmcb->save.rip; 1677 kvm_run->debug.arch.exception = DB_VECTOR; 1678 return 0; 1679 } 1680 1681 return 1; 1682} 1683 1684static int bp_interception(struct vcpu_svm *svm) 1685{ 1686 struct kvm_run *kvm_run = svm->vcpu.run; 1687 1688 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1689 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; 1690 kvm_run->debug.arch.exception = BP_VECTOR; 1691 return 0; 1692} 1693 1694static int ud_interception(struct vcpu_svm *svm) 1695{ 1696 int er; 1697 1698 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); 1699 if (er != EMULATE_DONE) 1700 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 1701 return 1; 1702} 1703 1704static int ac_interception(struct vcpu_svm *svm) 1705{ 1706 kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0); 1707 return 1; 1708} 1709 1710static void svm_fpu_activate(struct kvm_vcpu *vcpu) 1711{ 1712 struct vcpu_svm *svm = to_svm(vcpu); 1713 1714 clr_exception_intercept(svm, NM_VECTOR); 1715 1716 svm->vcpu.fpu_active = 1; 1717 update_cr0_intercept(svm); 1718} 1719 1720static int nm_interception(struct vcpu_svm *svm) 1721{ 1722 svm_fpu_activate(&svm->vcpu); 1723 return 1; 1724} 1725 1726static bool is_erratum_383(void) 1727{ 1728 int err, i; 1729 u64 value; 1730 1731 if (!erratum_383_found) 1732 return false; 1733 1734 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err); 1735 if (err) 1736 return false; 1737 1738 /* Bit 62 may or may not be set for this mce */ 1739 value &= ~(1ULL << 62); 1740 1741 if (value != 0xb600000000010015ULL) 1742 return false; 1743 1744 /* Clear MCi_STATUS registers */ 1745 for (i = 0; i < 6; ++i) 1746 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0); 1747 1748 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err); 1749 if (!err) { 1750 u32 low, high; 1751 1752 value &= ~(1ULL << 2); 1753 low = lower_32_bits(value); 1754 high = upper_32_bits(value); 1755 1756 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high); 1757 } 1758 1759 /* Flush tlb to evict multi-match entries */ 1760 __flush_tlb_all(); 1761 1762 return true; 1763} 1764 1765static void svm_handle_mce(struct vcpu_svm *svm) 1766{ 1767 if (is_erratum_383()) { 1768 /* 1769 * Erratum 383 triggered. Guest state is corrupt so kill the 1770 * guest. 1771 */ 1772 pr_err("KVM: Guest triggered AMD Erratum 383\n"); 1773 1774 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu); 1775 1776 return; 1777 } 1778 1779 /* 1780 * On an #MC intercept the MCE handler is not called automatically in 1781 * the host. So do it by hand here. 1782 */ 1783 asm volatile ( 1784 "int $0x12\n"); 1785 /* not sure if we ever come back to this point */ 1786 1787 return; 1788} 1789 1790static int mc_interception(struct vcpu_svm *svm) 1791{ 1792 return 1; 1793} 1794 1795static int shutdown_interception(struct vcpu_svm *svm) 1796{ 1797 struct kvm_run *kvm_run = svm->vcpu.run; 1798 1799 /* 1800 * VMCB is undefined after a SHUTDOWN intercept 1801 * so reinitialize it. 1802 */ 1803 clear_page(svm->vmcb); 1804 init_vmcb(svm); 1805 1806 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 1807 return 0; 1808} 1809 1810static int io_interception(struct vcpu_svm *svm) 1811{ 1812 struct kvm_vcpu *vcpu = &svm->vcpu; 1813 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ 1814 int size, in, string; 1815 unsigned port; 1816 1817 ++svm->vcpu.stat.io_exits; 1818 string = (io_info & SVM_IOIO_STR_MASK) != 0; 1819 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1820 if (string || in) 1821 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 1822 1823 port = io_info >> 16; 1824 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1825 svm->next_rip = svm->vmcb->control.exit_info_2; 1826 skip_emulated_instruction(&svm->vcpu); 1827 1828 return kvm_fast_pio_out(vcpu, size, port); 1829} 1830 1831static int nmi_interception(struct vcpu_svm *svm) 1832{ 1833 return 1; 1834} 1835 1836static int intr_interception(struct vcpu_svm *svm) 1837{ 1838 ++svm->vcpu.stat.irq_exits; 1839 return 1; 1840} 1841 1842static int nop_on_interception(struct vcpu_svm *svm) 1843{ 1844 return 1; 1845} 1846 1847static int halt_interception(struct vcpu_svm *svm) 1848{ 1849 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; 1850 return kvm_emulate_halt(&svm->vcpu); 1851} 1852 1853static int vmmcall_interception(struct vcpu_svm *svm) 1854{ 1855 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 1856 kvm_emulate_hypercall(&svm->vcpu); 1857 return 1; 1858} 1859 1860static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) 1861{ 1862 struct vcpu_svm *svm = to_svm(vcpu); 1863 1864 return svm->nested.nested_cr3; 1865} 1866 1867static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) 1868{ 1869 struct vcpu_svm *svm = to_svm(vcpu); 1870 u64 cr3 = svm->nested.nested_cr3; 1871 u64 pdpte; 1872 int ret; 1873 1874 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte, 1875 offset_in_page(cr3) + index * 8, 8); 1876 if (ret) 1877 return 0; 1878 return pdpte; 1879} 1880 1881static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, 1882 unsigned long root) 1883{ 1884 struct vcpu_svm *svm = to_svm(vcpu); 1885 1886 svm->vmcb->control.nested_cr3 = root; 1887 mark_dirty(svm->vmcb, VMCB_NPT); 1888 svm_flush_tlb(vcpu); 1889} 1890 1891static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, 1892 struct x86_exception *fault) 1893{ 1894 struct vcpu_svm *svm = to_svm(vcpu); 1895 1896 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) { 1897 /* 1898 * TODO: track the cause of the nested page fault, and 1899 * correctly fill in the high bits of exit_info_1. 1900 */ 1901 svm->vmcb->control.exit_code = SVM_EXIT_NPF; 1902 svm->vmcb->control.exit_code_hi = 0; 1903 svm->vmcb->control.exit_info_1 = (1ULL << 32); 1904 svm->vmcb->control.exit_info_2 = fault->address; 1905 } 1906 1907 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL; 1908 svm->vmcb->control.exit_info_1 |= fault->error_code; 1909 1910 /* 1911 * The present bit is always zero for page structure faults on real 1912 * hardware. 1913 */ 1914 if (svm->vmcb->control.exit_info_1 & (2ULL << 32)) 1915 svm->vmcb->control.exit_info_1 &= ~1; 1916 1917 nested_svm_vmexit(svm); 1918} 1919 1920static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 1921{ 1922 WARN_ON(mmu_is_nested(vcpu)); 1923 kvm_init_shadow_mmu(vcpu); 1924 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; 1925 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; 1926 vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; 1927 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; 1928 vcpu->arch.mmu.shadow_root_level = get_npt_level(); 1929 reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); 1930 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 1931} 1932 1933static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) 1934{ 1935 vcpu->arch.walk_mmu = &vcpu->arch.mmu; 1936} 1937 1938static int nested_svm_check_permissions(struct vcpu_svm *svm) 1939{ 1940 if (!(svm->vcpu.arch.efer & EFER_SVME) 1941 || !is_paging(&svm->vcpu)) { 1942 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 1943 return 1; 1944 } 1945 1946 if (svm->vmcb->save.cpl) { 1947 kvm_inject_gp(&svm->vcpu, 0); 1948 return 1; 1949 } 1950 1951 return 0; 1952} 1953 1954static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 1955 bool has_error_code, u32 error_code) 1956{ 1957 int vmexit; 1958 1959 if (!is_guest_mode(&svm->vcpu)) 1960 return 0; 1961 1962 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; 1963 svm->vmcb->control.exit_code_hi = 0; 1964 svm->vmcb->control.exit_info_1 = error_code; 1965 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; 1966 1967 vmexit = nested_svm_intercept(svm); 1968 if (vmexit == NESTED_EXIT_DONE) 1969 svm->nested.exit_required = true; 1970 1971 return vmexit; 1972} 1973 1974/* This function returns true if it is save to enable the irq window */ 1975static inline bool nested_svm_intr(struct vcpu_svm *svm) 1976{ 1977 if (!is_guest_mode(&svm->vcpu)) 1978 return true; 1979 1980 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 1981 return true; 1982 1983 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) 1984 return false; 1985 1986 /* 1987 * if vmexit was already requested (by intercepted exception 1988 * for instance) do not overwrite it with "external interrupt" 1989 * vmexit. 1990 */ 1991 if (svm->nested.exit_required) 1992 return false; 1993 1994 svm->vmcb->control.exit_code = SVM_EXIT_INTR; 1995 svm->vmcb->control.exit_info_1 = 0; 1996 svm->vmcb->control.exit_info_2 = 0; 1997 1998 if (svm->nested.intercept & 1ULL) { 1999 /* 2000 * The #vmexit can't be emulated here directly because this 2001 * code path runs with irqs and preemption disabled. A 2002 * #vmexit emulation might sleep. Only signal request for 2003 * the #vmexit here. 2004 */ 2005 svm->nested.exit_required = true; 2006 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); 2007 return false; 2008 } 2009 2010 return true; 2011} 2012 2013/* This function returns true if it is save to enable the nmi window */ 2014static inline bool nested_svm_nmi(struct vcpu_svm *svm) 2015{ 2016 if (!is_guest_mode(&svm->vcpu)) 2017 return true; 2018 2019 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) 2020 return true; 2021 2022 svm->vmcb->control.exit_code = SVM_EXIT_NMI; 2023 svm->nested.exit_required = true; 2024 2025 return false; 2026} 2027 2028static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) 2029{ 2030 struct page *page; 2031 2032 might_sleep(); 2033 2034 page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT); 2035 if (is_error_page(page)) 2036 goto error; 2037 2038 *_page = page; 2039 2040 return kmap(page); 2041 2042error: 2043 kvm_inject_gp(&svm->vcpu, 0); 2044 2045 return NULL; 2046} 2047 2048static void nested_svm_unmap(struct page *page) 2049{ 2050 kunmap(page); 2051 kvm_release_page_dirty(page); 2052} 2053 2054static int nested_svm_intercept_ioio(struct vcpu_svm *svm) 2055{ 2056 unsigned port, size, iopm_len; 2057 u16 val, mask; 2058 u8 start_bit; 2059 u64 gpa; 2060 2061 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) 2062 return NESTED_EXIT_HOST; 2063 2064 port = svm->vmcb->control.exit_info_1 >> 16; 2065 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> 2066 SVM_IOIO_SIZE_SHIFT; 2067 gpa = svm->nested.vmcb_iopm + (port / 8); 2068 start_bit = port % 8; 2069 iopm_len = (start_bit + size > 8) ? 2 : 1; 2070 mask = (0xf >> (4 - size)) << start_bit; 2071 val = 0; 2072 2073 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len)) 2074 return NESTED_EXIT_DONE; 2075 2076 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 2077} 2078 2079static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) 2080{ 2081 u32 offset, msr, value; 2082 int write, mask; 2083 2084 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 2085 return NESTED_EXIT_HOST; 2086 2087 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 2088 offset = svm_msrpm_offset(msr); 2089 write = svm->vmcb->control.exit_info_1 & 1; 2090 mask = 1 << ((2 * (msr & 0xf)) + write); 2091 2092 if (offset == MSR_INVALID) 2093 return NESTED_EXIT_DONE; 2094 2095 /* Offset is in 32 bit units but need in 8 bit units */ 2096 offset *= 4; 2097 2098 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4)) 2099 return NESTED_EXIT_DONE; 2100 2101 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 2102} 2103 2104static int nested_svm_exit_special(struct vcpu_svm *svm) 2105{ 2106 u32 exit_code = svm->vmcb->control.exit_code; 2107 2108 switch (exit_code) { 2109 case SVM_EXIT_INTR: 2110 case SVM_EXIT_NMI: 2111 case SVM_EXIT_EXCP_BASE + MC_VECTOR: 2112 return NESTED_EXIT_HOST; 2113 case SVM_EXIT_NPF: 2114 /* For now we are always handling NPFs when using them */ 2115 if (npt_enabled) 2116 return NESTED_EXIT_HOST; 2117 break; 2118 case SVM_EXIT_EXCP_BASE + PF_VECTOR: 2119 /* When we're shadowing, trap PFs, but not async PF */ 2120 if (!npt_enabled && svm->apf_reason == 0) 2121 return NESTED_EXIT_HOST; 2122 break; 2123 case SVM_EXIT_EXCP_BASE + NM_VECTOR: 2124 nm_interception(svm); 2125 break; 2126 default: 2127 break; 2128 } 2129 2130 return NESTED_EXIT_CONTINUE; 2131} 2132 2133/* 2134 * If this function returns true, this #vmexit was already handled 2135 */ 2136static int nested_svm_intercept(struct vcpu_svm *svm) 2137{ 2138 u32 exit_code = svm->vmcb->control.exit_code; 2139 int vmexit = NESTED_EXIT_HOST; 2140 2141 switch (exit_code) { 2142 case SVM_EXIT_MSR: 2143 vmexit = nested_svm_exit_handled_msr(svm); 2144 break; 2145 case SVM_EXIT_IOIO: 2146 vmexit = nested_svm_intercept_ioio(svm); 2147 break; 2148 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { 2149 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); 2150 if (svm->nested.intercept_cr & bit) 2151 vmexit = NESTED_EXIT_DONE; 2152 break; 2153 } 2154 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { 2155 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); 2156 if (svm->nested.intercept_dr & bit) 2157 vmexit = NESTED_EXIT_DONE; 2158 break; 2159 } 2160 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { 2161 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); 2162 if (svm->nested.intercept_exceptions & excp_bits) 2163 vmexit = NESTED_EXIT_DONE; 2164 /* async page fault always cause vmexit */ 2165 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && 2166 svm->apf_reason != 0) 2167 vmexit = NESTED_EXIT_DONE; 2168 break; 2169 } 2170 case SVM_EXIT_ERR: { 2171 vmexit = NESTED_EXIT_DONE; 2172 break; 2173 } 2174 default: { 2175 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); 2176 if (svm->nested.intercept & exit_bits) 2177 vmexit = NESTED_EXIT_DONE; 2178 } 2179 } 2180 2181 return vmexit; 2182} 2183 2184static int nested_svm_exit_handled(struct vcpu_svm *svm) 2185{ 2186 int vmexit; 2187 2188 vmexit = nested_svm_intercept(svm); 2189 2190 if (vmexit == NESTED_EXIT_DONE) 2191 nested_svm_vmexit(svm); 2192 2193 return vmexit; 2194} 2195 2196static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb) 2197{ 2198 struct vmcb_control_area *dst = &dst_vmcb->control; 2199 struct vmcb_control_area *from = &from_vmcb->control; 2200 2201 dst->intercept_cr = from->intercept_cr; 2202 dst->intercept_dr = from->intercept_dr; 2203 dst->intercept_exceptions = from->intercept_exceptions; 2204 dst->intercept = from->intercept; 2205 dst->iopm_base_pa = from->iopm_base_pa; 2206 dst->msrpm_base_pa = from->msrpm_base_pa; 2207 dst->tsc_offset = from->tsc_offset; 2208 dst->asid = from->asid; 2209 dst->tlb_ctl = from->tlb_ctl; 2210 dst->int_ctl = from->int_ctl; 2211 dst->int_vector = from->int_vector; 2212 dst->int_state = from->int_state; 2213 dst->exit_code = from->exit_code; 2214 dst->exit_code_hi = from->exit_code_hi; 2215 dst->exit_info_1 = from->exit_info_1; 2216 dst->exit_info_2 = from->exit_info_2; 2217 dst->exit_int_info = from->exit_int_info; 2218 dst->exit_int_info_err = from->exit_int_info_err; 2219 dst->nested_ctl = from->nested_ctl; 2220 dst->event_inj = from->event_inj; 2221 dst->event_inj_err = from->event_inj_err; 2222 dst->nested_cr3 = from->nested_cr3; 2223 dst->lbr_ctl = from->lbr_ctl; 2224} 2225 2226static int nested_svm_vmexit(struct vcpu_svm *svm) 2227{ 2228 struct vmcb *nested_vmcb; 2229 struct vmcb *hsave = svm->nested.hsave; 2230 struct vmcb *vmcb = svm->vmcb; 2231 struct page *page; 2232 2233 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, 2234 vmcb->control.exit_info_1, 2235 vmcb->control.exit_info_2, 2236 vmcb->control.exit_int_info, 2237 vmcb->control.exit_int_info_err, 2238 KVM_ISA_SVM); 2239 2240 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); 2241 if (!nested_vmcb) 2242 return 1; 2243 2244 /* Exit Guest-Mode */ 2245 leave_guest_mode(&svm->vcpu); 2246 svm->nested.vmcb = 0; 2247 2248 /* Give the current vmcb to the guest */ 2249 disable_gif(svm); 2250 2251 nested_vmcb->save.es = vmcb->save.es; 2252 nested_vmcb->save.cs = vmcb->save.cs; 2253 nested_vmcb->save.ss = vmcb->save.ss; 2254 nested_vmcb->save.ds = vmcb->save.ds; 2255 nested_vmcb->save.gdtr = vmcb->save.gdtr; 2256 nested_vmcb->save.idtr = vmcb->save.idtr; 2257 nested_vmcb->save.efer = svm->vcpu.arch.efer; 2258 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); 2259 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); 2260 nested_vmcb->save.cr2 = vmcb->save.cr2; 2261 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; 2262 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); 2263 nested_vmcb->save.rip = vmcb->save.rip; 2264 nested_vmcb->save.rsp = vmcb->save.rsp; 2265 nested_vmcb->save.rax = vmcb->save.rax; 2266 nested_vmcb->save.dr7 = vmcb->save.dr7; 2267 nested_vmcb->save.dr6 = vmcb->save.dr6; 2268 nested_vmcb->save.cpl = vmcb->save.cpl; 2269 2270 nested_vmcb->control.int_ctl = vmcb->control.int_ctl; 2271 nested_vmcb->control.int_vector = vmcb->control.int_vector; 2272 nested_vmcb->control.int_state = vmcb->control.int_state; 2273 nested_vmcb->control.exit_code = vmcb->control.exit_code; 2274 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; 2275 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1; 2276 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; 2277 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; 2278 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; 2279 2280 if (svm->nrips_enabled) 2281 nested_vmcb->control.next_rip = vmcb->control.next_rip; 2282 2283 /* 2284 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have 2285 * to make sure that we do not lose injected events. So check event_inj 2286 * here and copy it to exit_int_info if it is valid. 2287 * Exit_int_info and event_inj can't be both valid because the case 2288 * below only happens on a VMRUN instruction intercept which has 2289 * no valid exit_int_info set. 2290 */ 2291 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { 2292 struct vmcb_control_area *nc = &nested_vmcb->control; 2293 2294 nc->exit_int_info = vmcb->control.event_inj; 2295 nc->exit_int_info_err = vmcb->control.event_inj_err; 2296 } 2297 2298 nested_vmcb->control.tlb_ctl = 0; 2299 nested_vmcb->control.event_inj = 0; 2300 nested_vmcb->control.event_inj_err = 0; 2301 2302 /* We always set V_INTR_MASKING and remember the old value in hflags */ 2303 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 2304 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; 2305 2306 /* Restore the original control entries */ 2307 copy_vmcb_control_area(vmcb, hsave); 2308 2309 kvm_clear_exception_queue(&svm->vcpu); 2310 kvm_clear_interrupt_queue(&svm->vcpu); 2311 2312 svm->nested.nested_cr3 = 0; 2313 2314 /* Restore selected save entries */ 2315 svm->vmcb->save.es = hsave->save.es; 2316 svm->vmcb->save.cs = hsave->save.cs; 2317 svm->vmcb->save.ss = hsave->save.ss; 2318 svm->vmcb->save.ds = hsave->save.ds; 2319 svm->vmcb->save.gdtr = hsave->save.gdtr; 2320 svm->vmcb->save.idtr = hsave->save.idtr; 2321 kvm_set_rflags(&svm->vcpu, hsave->save.rflags); 2322 svm_set_efer(&svm->vcpu, hsave->save.efer); 2323 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); 2324 svm_set_cr4(&svm->vcpu, hsave->save.cr4); 2325 if (npt_enabled) { 2326 svm->vmcb->save.cr3 = hsave->save.cr3; 2327 svm->vcpu.arch.cr3 = hsave->save.cr3; 2328 } else { 2329 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); 2330 } 2331 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); 2332 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); 2333 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip); 2334 svm->vmcb->save.dr7 = 0; 2335 svm->vmcb->save.cpl = 0; 2336 svm->vmcb->control.exit_int_info = 0; 2337 2338 mark_all_dirty(svm->vmcb); 2339 2340 nested_svm_unmap(page); 2341 2342 nested_svm_uninit_mmu_context(&svm->vcpu); 2343 kvm_mmu_reset_context(&svm->vcpu); 2344 kvm_mmu_load(&svm->vcpu); 2345 2346 return 0; 2347} 2348 2349static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) 2350{ 2351 /* 2352 * This function merges the msr permission bitmaps of kvm and the 2353 * nested vmcb. It is optimized in that it only merges the parts where 2354 * the kvm msr permission bitmap may contain zero bits 2355 */ 2356 int i; 2357 2358 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 2359 return true; 2360 2361 for (i = 0; i < MSRPM_OFFSETS; i++) { 2362 u32 value, p; 2363 u64 offset; 2364 2365 if (msrpm_offsets[i] == 0xffffffff) 2366 break; 2367 2368 p = msrpm_offsets[i]; 2369 offset = svm->nested.vmcb_msrpm + (p * 4); 2370 2371 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4)) 2372 return false; 2373 2374 svm->nested.msrpm[p] = svm->msrpm[p] | value; 2375 } 2376 2377 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); 2378 2379 return true; 2380} 2381 2382static bool nested_vmcb_checks(struct vmcb *vmcb) 2383{ 2384 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0) 2385 return false; 2386 2387 if (vmcb->control.asid == 0) 2388 return false; 2389 2390 if (vmcb->control.nested_ctl && !npt_enabled) 2391 return false; 2392 2393 return true; 2394} 2395 2396static bool nested_svm_vmrun(struct vcpu_svm *svm) 2397{ 2398 struct vmcb *nested_vmcb; 2399 struct vmcb *hsave = svm->nested.hsave; 2400 struct vmcb *vmcb = svm->vmcb; 2401 struct page *page; 2402 u64 vmcb_gpa; 2403 2404 vmcb_gpa = svm->vmcb->save.rax; 2405 2406 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2407 if (!nested_vmcb) 2408 return false; 2409 2410 if (!nested_vmcb_checks(nested_vmcb)) { 2411 nested_vmcb->control.exit_code = SVM_EXIT_ERR; 2412 nested_vmcb->control.exit_code_hi = 0; 2413 nested_vmcb->control.exit_info_1 = 0; 2414 nested_vmcb->control.exit_info_2 = 0; 2415 2416 nested_svm_unmap(page); 2417 2418 return false; 2419 } 2420 2421 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa, 2422 nested_vmcb->save.rip, 2423 nested_vmcb->control.int_ctl, 2424 nested_vmcb->control.event_inj, 2425 nested_vmcb->control.nested_ctl); 2426 2427 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff, 2428 nested_vmcb->control.intercept_cr >> 16, 2429 nested_vmcb->control.intercept_exceptions, 2430 nested_vmcb->control.intercept); 2431 2432 /* Clear internal status */ 2433 kvm_clear_exception_queue(&svm->vcpu); 2434 kvm_clear_interrupt_queue(&svm->vcpu); 2435 2436 /* 2437 * Save the old vmcb, so we don't need to pick what we save, but can 2438 * restore everything when a VMEXIT occurs 2439 */ 2440 hsave->save.es = vmcb->save.es; 2441 hsave->save.cs = vmcb->save.cs; 2442 hsave->save.ss = vmcb->save.ss; 2443 hsave->save.ds = vmcb->save.ds; 2444 hsave->save.gdtr = vmcb->save.gdtr; 2445 hsave->save.idtr = vmcb->save.idtr; 2446 hsave->save.efer = svm->vcpu.arch.efer; 2447 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); 2448 hsave->save.cr4 = svm->vcpu.arch.cr4; 2449 hsave->save.rflags = kvm_get_rflags(&svm->vcpu); 2450 hsave->save.rip = kvm_rip_read(&svm->vcpu); 2451 hsave->save.rsp = vmcb->save.rsp; 2452 hsave->save.rax = vmcb->save.rax; 2453 if (npt_enabled) 2454 hsave->save.cr3 = vmcb->save.cr3; 2455 else 2456 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); 2457 2458 copy_vmcb_control_area(hsave, vmcb); 2459 2460 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) 2461 svm->vcpu.arch.hflags |= HF_HIF_MASK; 2462 else 2463 svm->vcpu.arch.hflags &= ~HF_HIF_MASK; 2464 2465 if (nested_vmcb->control.nested_ctl) { 2466 kvm_mmu_unload(&svm->vcpu); 2467 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3; 2468 nested_svm_init_mmu_context(&svm->vcpu); 2469 } 2470 2471 /* Load the nested guest state */ 2472 svm->vmcb->save.es = nested_vmcb->save.es; 2473 svm->vmcb->save.cs = nested_vmcb->save.cs; 2474 svm->vmcb->save.ss = nested_vmcb->save.ss; 2475 svm->vmcb->save.ds = nested_vmcb->save.ds; 2476 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; 2477 svm->vmcb->save.idtr = nested_vmcb->save.idtr; 2478 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); 2479 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); 2480 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); 2481 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); 2482 if (npt_enabled) { 2483 svm->vmcb->save.cr3 = nested_vmcb->save.cr3; 2484 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; 2485 } else 2486 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); 2487 2488 /* Guest paging mode is active - reset mmu */ 2489 kvm_mmu_reset_context(&svm->vcpu); 2490 2491 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; 2492 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); 2493 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); 2494 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); 2495 2496 /* In case we don't even reach vcpu_run, the fields are not updated */ 2497 svm->vmcb->save.rax = nested_vmcb->save.rax; 2498 svm->vmcb->save.rsp = nested_vmcb->save.rsp; 2499 svm->vmcb->save.rip = nested_vmcb->save.rip; 2500 svm->vmcb->save.dr7 = nested_vmcb->save.dr7; 2501 svm->vmcb->save.dr6 = nested_vmcb->save.dr6; 2502 svm->vmcb->save.cpl = nested_vmcb->save.cpl; 2503 2504 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; 2505 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; 2506 2507 /* cache intercepts */ 2508 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; 2509 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; 2510 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; 2511 svm->nested.intercept = nested_vmcb->control.intercept; 2512 2513 svm_flush_tlb(&svm->vcpu); 2514 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; 2515 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) 2516 svm->vcpu.arch.hflags |= HF_VINTR_MASK; 2517 else 2518 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; 2519 2520 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { 2521 /* We only want the cr8 intercept bits of the guest */ 2522 clr_cr_intercept(svm, INTERCEPT_CR8_READ); 2523 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); 2524 } 2525 2526 /* We don't want to see VMMCALLs from a nested guest */ 2527 clr_intercept(svm, INTERCEPT_VMMCALL); 2528 2529 svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; 2530 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; 2531 svm->vmcb->control.int_state = nested_vmcb->control.int_state; 2532 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; 2533 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 2534 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 2535 2536 nested_svm_unmap(page); 2537 2538 /* Enter Guest-Mode */ 2539 enter_guest_mode(&svm->vcpu); 2540 2541 /* 2542 * Merge guest and host intercepts - must be called with vcpu in 2543 * guest-mode to take affect here 2544 */ 2545 recalc_intercepts(svm); 2546 2547 svm->nested.vmcb = vmcb_gpa; 2548 2549 enable_gif(svm); 2550 2551 mark_all_dirty(svm->vmcb); 2552 2553 return true; 2554} 2555 2556static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) 2557{ 2558 to_vmcb->save.fs = from_vmcb->save.fs; 2559 to_vmcb->save.gs = from_vmcb->save.gs; 2560 to_vmcb->save.tr = from_vmcb->save.tr; 2561 to_vmcb->save.ldtr = from_vmcb->save.ldtr; 2562 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base; 2563 to_vmcb->save.star = from_vmcb->save.star; 2564 to_vmcb->save.lstar = from_vmcb->save.lstar; 2565 to_vmcb->save.cstar = from_vmcb->save.cstar; 2566 to_vmcb->save.sfmask = from_vmcb->save.sfmask; 2567 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs; 2568 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp; 2569 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; 2570} 2571 2572static int vmload_interception(struct vcpu_svm *svm) 2573{ 2574 struct vmcb *nested_vmcb; 2575 struct page *page; 2576 2577 if (nested_svm_check_permissions(svm)) 2578 return 1; 2579 2580 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2581 if (!nested_vmcb) 2582 return 1; 2583 2584 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2585 skip_emulated_instruction(&svm->vcpu); 2586 2587 nested_svm_vmloadsave(nested_vmcb, svm->vmcb); 2588 nested_svm_unmap(page); 2589 2590 return 1; 2591} 2592 2593static int vmsave_interception(struct vcpu_svm *svm) 2594{ 2595 struct vmcb *nested_vmcb; 2596 struct page *page; 2597 2598 if (nested_svm_check_permissions(svm)) 2599 return 1; 2600 2601 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2602 if (!nested_vmcb) 2603 return 1; 2604 2605 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2606 skip_emulated_instruction(&svm->vcpu); 2607 2608 nested_svm_vmloadsave(svm->vmcb, nested_vmcb); 2609 nested_svm_unmap(page); 2610 2611 return 1; 2612} 2613 2614static int vmrun_interception(struct vcpu_svm *svm) 2615{ 2616 if (nested_svm_check_permissions(svm)) 2617 return 1; 2618 2619 /* Save rip after vmrun instruction */ 2620 kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3); 2621 2622 if (!nested_svm_vmrun(svm)) 2623 return 1; 2624 2625 if (!nested_svm_vmrun_msrpm(svm)) 2626 goto failed; 2627 2628 return 1; 2629 2630failed: 2631 2632 svm->vmcb->control.exit_code = SVM_EXIT_ERR; 2633 svm->vmcb->control.exit_code_hi = 0; 2634 svm->vmcb->control.exit_info_1 = 0; 2635 svm->vmcb->control.exit_info_2 = 0; 2636 2637 nested_svm_vmexit(svm); 2638 2639 return 1; 2640} 2641 2642static int stgi_interception(struct vcpu_svm *svm) 2643{ 2644 if (nested_svm_check_permissions(svm)) 2645 return 1; 2646 2647 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2648 skip_emulated_instruction(&svm->vcpu); 2649 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 2650 2651 enable_gif(svm); 2652 2653 return 1; 2654} 2655 2656static int clgi_interception(struct vcpu_svm *svm) 2657{ 2658 if (nested_svm_check_permissions(svm)) 2659 return 1; 2660 2661 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2662 skip_emulated_instruction(&svm->vcpu); 2663 2664 disable_gif(svm); 2665 2666 /* After a CLGI no interrupts should come */ 2667 svm_clear_vintr(svm); 2668 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 2669 2670 mark_dirty(svm->vmcb, VMCB_INTR); 2671 2672 return 1; 2673} 2674 2675static int invlpga_interception(struct vcpu_svm *svm) 2676{ 2677 struct kvm_vcpu *vcpu = &svm->vcpu; 2678 2679 trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX), 2680 kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); 2681 2682 /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ 2683 kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); 2684 2685 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2686 skip_emulated_instruction(&svm->vcpu); 2687 return 1; 2688} 2689 2690static int skinit_interception(struct vcpu_svm *svm) 2691{ 2692 trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); 2693 2694 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2695 return 1; 2696} 2697 2698static int wbinvd_interception(struct vcpu_svm *svm) 2699{ 2700 kvm_emulate_wbinvd(&svm->vcpu); 2701 return 1; 2702} 2703 2704static int xsetbv_interception(struct vcpu_svm *svm) 2705{ 2706 u64 new_bv = kvm_read_edx_eax(&svm->vcpu); 2707 u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); 2708 2709 if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { 2710 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2711 skip_emulated_instruction(&svm->vcpu); 2712 } 2713 2714 return 1; 2715} 2716 2717static int task_switch_interception(struct vcpu_svm *svm) 2718{ 2719 u16 tss_selector; 2720 int reason; 2721 int int_type = svm->vmcb->control.exit_int_info & 2722 SVM_EXITINTINFO_TYPE_MASK; 2723 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK; 2724 uint32_t type = 2725 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; 2726 uint32_t idt_v = 2727 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; 2728 bool has_error_code = false; 2729 u32 error_code = 0; 2730 2731 tss_selector = (u16)svm->vmcb->control.exit_info_1; 2732 2733 if (svm->vmcb->control.exit_info_2 & 2734 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) 2735 reason = TASK_SWITCH_IRET; 2736 else if (svm->vmcb->control.exit_info_2 & 2737 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) 2738 reason = TASK_SWITCH_JMP; 2739 else if (idt_v) 2740 reason = TASK_SWITCH_GATE; 2741 else 2742 reason = TASK_SWITCH_CALL; 2743 2744 if (reason == TASK_SWITCH_GATE) { 2745 switch (type) { 2746 case SVM_EXITINTINFO_TYPE_NMI: 2747 svm->vcpu.arch.nmi_injected = false; 2748 break; 2749 case SVM_EXITINTINFO_TYPE_EXEPT: 2750 if (svm->vmcb->control.exit_info_2 & 2751 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { 2752 has_error_code = true; 2753 error_code = 2754 (u32)svm->vmcb->control.exit_info_2; 2755 } 2756 kvm_clear_exception_queue(&svm->vcpu); 2757 break; 2758 case SVM_EXITINTINFO_TYPE_INTR: 2759 kvm_clear_interrupt_queue(&svm->vcpu); 2760 break; 2761 default: 2762 break; 2763 } 2764 } 2765 2766 if (reason != TASK_SWITCH_GATE || 2767 int_type == SVM_EXITINTINFO_TYPE_SOFT || 2768 (int_type == SVM_EXITINTINFO_TYPE_EXEPT && 2769 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) 2770 skip_emulated_instruction(&svm->vcpu); 2771 2772 if (int_type != SVM_EXITINTINFO_TYPE_SOFT) 2773 int_vec = -1; 2774 2775 if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, 2776 has_error_code, error_code) == EMULATE_FAIL) { 2777 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 2778 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 2779 svm->vcpu.run->internal.ndata = 0; 2780 return 0; 2781 } 2782 return 1; 2783} 2784 2785static int cpuid_interception(struct vcpu_svm *svm) 2786{ 2787 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 2788 kvm_emulate_cpuid(&svm->vcpu); 2789 return 1; 2790} 2791 2792static int iret_interception(struct vcpu_svm *svm) 2793{ 2794 ++svm->vcpu.stat.nmi_window_exits; 2795 clr_intercept(svm, INTERCEPT_IRET); 2796 svm->vcpu.arch.hflags |= HF_IRET_MASK; 2797 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); 2798 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 2799 return 1; 2800} 2801 2802static int invlpg_interception(struct vcpu_svm *svm) 2803{ 2804 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) 2805 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 2806 2807 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); 2808 skip_emulated_instruction(&svm->vcpu); 2809 return 1; 2810} 2811 2812static int emulate_on_interception(struct vcpu_svm *svm) 2813{ 2814 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 2815} 2816 2817static int rdpmc_interception(struct vcpu_svm *svm) 2818{ 2819 int err; 2820 2821 if (!static_cpu_has(X86_FEATURE_NRIPS)) 2822 return emulate_on_interception(svm); 2823 2824 err = kvm_rdpmc(&svm->vcpu); 2825 kvm_complete_insn_gp(&svm->vcpu, err); 2826 2827 return 1; 2828} 2829 2830static bool check_selective_cr0_intercepted(struct vcpu_svm *svm, 2831 unsigned long val) 2832{ 2833 unsigned long cr0 = svm->vcpu.arch.cr0; 2834 bool ret = false; 2835 u64 intercept; 2836 2837 intercept = svm->nested.intercept; 2838 2839 if (!is_guest_mode(&svm->vcpu) || 2840 (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) 2841 return false; 2842 2843 cr0 &= ~SVM_CR0_SELECTIVE_MASK; 2844 val &= ~SVM_CR0_SELECTIVE_MASK; 2845 2846 if (cr0 ^ val) { 2847 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; 2848 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); 2849 } 2850 2851 return ret; 2852} 2853 2854#define CR_VALID (1ULL << 63) 2855 2856static int cr_interception(struct vcpu_svm *svm) 2857{ 2858 int reg, cr; 2859 unsigned long val; 2860 int err; 2861 2862 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) 2863 return emulate_on_interception(svm); 2864 2865 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0)) 2866 return emulate_on_interception(svm); 2867 2868 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; 2869 if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE) 2870 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0; 2871 else 2872 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; 2873 2874 err = 0; 2875 if (cr >= 16) { /* mov to cr */ 2876 cr -= 16; 2877 val = kvm_register_read(&svm->vcpu, reg); 2878 switch (cr) { 2879 case 0: 2880 if (!check_selective_cr0_intercepted(svm, val)) 2881 err = kvm_set_cr0(&svm->vcpu, val); 2882 else 2883 return 1; 2884 2885 break; 2886 case 3: 2887 err = kvm_set_cr3(&svm->vcpu, val); 2888 break; 2889 case 4: 2890 err = kvm_set_cr4(&svm->vcpu, val); 2891 break; 2892 case 8: 2893 err = kvm_set_cr8(&svm->vcpu, val); 2894 break; 2895 default: 2896 WARN(1, "unhandled write to CR%d", cr); 2897 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2898 return 1; 2899 } 2900 } else { /* mov from cr */ 2901 switch (cr) { 2902 case 0: 2903 val = kvm_read_cr0(&svm->vcpu); 2904 break; 2905 case 2: 2906 val = svm->vcpu.arch.cr2; 2907 break; 2908 case 3: 2909 val = kvm_read_cr3(&svm->vcpu); 2910 break; 2911 case 4: 2912 val = kvm_read_cr4(&svm->vcpu); 2913 break; 2914 case 8: 2915 val = kvm_get_cr8(&svm->vcpu); 2916 break; 2917 default: 2918 WARN(1, "unhandled read from CR%d", cr); 2919 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2920 return 1; 2921 } 2922 kvm_register_write(&svm->vcpu, reg, val); 2923 } 2924 kvm_complete_insn_gp(&svm->vcpu, err); 2925 2926 return 1; 2927} 2928 2929static int dr_interception(struct vcpu_svm *svm) 2930{ 2931 int reg, dr; 2932 unsigned long val; 2933 2934 if (svm->vcpu.guest_debug == 0) { 2935 /* 2936 * No more DR vmexits; force a reload of the debug registers 2937 * and reenter on this instruction. The next vmexit will 2938 * retrieve the full state of the debug registers. 2939 */ 2940 clr_dr_intercepts(svm); 2941 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; 2942 return 1; 2943 } 2944 2945 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) 2946 return emulate_on_interception(svm); 2947 2948 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; 2949 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0; 2950 2951 if (dr >= 16) { /* mov to DRn */ 2952 if (!kvm_require_dr(&svm->vcpu, dr - 16)) 2953 return 1; 2954 val = kvm_register_read(&svm->vcpu, reg); 2955 kvm_set_dr(&svm->vcpu, dr - 16, val); 2956 } else { 2957 if (!kvm_require_dr(&svm->vcpu, dr)) 2958 return 1; 2959 kvm_get_dr(&svm->vcpu, dr, &val); 2960 kvm_register_write(&svm->vcpu, reg, val); 2961 } 2962 2963 skip_emulated_instruction(&svm->vcpu); 2964 2965 return 1; 2966} 2967 2968static int cr8_write_interception(struct vcpu_svm *svm) 2969{ 2970 struct kvm_run *kvm_run = svm->vcpu.run; 2971 int r; 2972 2973 u8 cr8_prev = kvm_get_cr8(&svm->vcpu); 2974 /* instruction emulation calls kvm_set_cr8() */ 2975 r = cr_interception(svm); 2976 if (lapic_in_kernel(&svm->vcpu)) 2977 return r; 2978 if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) 2979 return r; 2980 kvm_run->exit_reason = KVM_EXIT_SET_TPR; 2981 return 0; 2982} 2983 2984static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) 2985{ 2986 struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); 2987 return vmcb->control.tsc_offset + host_tsc; 2988} 2989 2990static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 2991{ 2992 struct vcpu_svm *svm = to_svm(vcpu); 2993 2994 switch (msr_info->index) { 2995 case MSR_IA32_TSC: { 2996 msr_info->data = svm->vmcb->control.tsc_offset + 2997 kvm_scale_tsc(vcpu, rdtsc()); 2998 2999 break; 3000 } 3001 case MSR_STAR: 3002 msr_info->data = svm->vmcb->save.star; 3003 break; 3004#ifdef CONFIG_X86_64 3005 case MSR_LSTAR: 3006 msr_info->data = svm->vmcb->save.lstar; 3007 break; 3008 case MSR_CSTAR: 3009 msr_info->data = svm->vmcb->save.cstar; 3010 break; 3011 case MSR_KERNEL_GS_BASE: 3012 msr_info->data = svm->vmcb->save.kernel_gs_base; 3013 break; 3014 case MSR_SYSCALL_MASK: 3015 msr_info->data = svm->vmcb->save.sfmask; 3016 break; 3017#endif 3018 case MSR_IA32_SYSENTER_CS: 3019 msr_info->data = svm->vmcb->save.sysenter_cs; 3020 break; 3021 case MSR_IA32_SYSENTER_EIP: 3022 msr_info->data = svm->sysenter_eip; 3023 break; 3024 case MSR_IA32_SYSENTER_ESP: 3025 msr_info->data = svm->sysenter_esp; 3026 break; 3027 /* 3028 * Nobody will change the following 5 values in the VMCB so we can 3029 * safely return them on rdmsr. They will always be 0 until LBRV is 3030 * implemented. 3031 */ 3032 case MSR_IA32_DEBUGCTLMSR: 3033 msr_info->data = svm->vmcb->save.dbgctl; 3034 break; 3035 case MSR_IA32_LASTBRANCHFROMIP: 3036 msr_info->data = svm->vmcb->save.br_from; 3037 break; 3038 case MSR_IA32_LASTBRANCHTOIP: 3039 msr_info->data = svm->vmcb->save.br_to; 3040 break; 3041 case MSR_IA32_LASTINTFROMIP: 3042 msr_info->data = svm->vmcb->save.last_excp_from; 3043 break; 3044 case MSR_IA32_LASTINTTOIP: 3045 msr_info->data = svm->vmcb->save.last_excp_to; 3046 break; 3047 case MSR_VM_HSAVE_PA: 3048 msr_info->data = svm->nested.hsave_msr; 3049 break; 3050 case MSR_VM_CR: 3051 msr_info->data = svm->nested.vm_cr_msr; 3052 break; 3053 case MSR_IA32_UCODE_REV: 3054 msr_info->data = 0x01000065; 3055 break; 3056 default: 3057 return kvm_get_msr_common(vcpu, msr_info); 3058 } 3059 return 0; 3060} 3061 3062static int rdmsr_interception(struct vcpu_svm *svm) 3063{ 3064 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); 3065 struct msr_data msr_info; 3066 3067 msr_info.index = ecx; 3068 msr_info.host_initiated = false; 3069 if (svm_get_msr(&svm->vcpu, &msr_info)) { 3070 trace_kvm_msr_read_ex(ecx); 3071 kvm_inject_gp(&svm->vcpu, 0); 3072 } else { 3073 trace_kvm_msr_read(ecx, msr_info.data); 3074 3075 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, 3076 msr_info.data & 0xffffffff); 3077 kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, 3078 msr_info.data >> 32); 3079 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3080 skip_emulated_instruction(&svm->vcpu); 3081 } 3082 return 1; 3083} 3084 3085static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) 3086{ 3087 struct vcpu_svm *svm = to_svm(vcpu); 3088 int svm_dis, chg_mask; 3089 3090 if (data & ~SVM_VM_CR_VALID_MASK) 3091 return 1; 3092 3093 chg_mask = SVM_VM_CR_VALID_MASK; 3094 3095 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) 3096 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); 3097 3098 svm->nested.vm_cr_msr &= ~chg_mask; 3099 svm->nested.vm_cr_msr |= (data & chg_mask); 3100 3101 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; 3102 3103 /* check for svm_disable while efer.svme is set */ 3104 if (svm_dis && (vcpu->arch.efer & EFER_SVME)) 3105 return 1; 3106 3107 return 0; 3108} 3109 3110static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) 3111{ 3112 struct vcpu_svm *svm = to_svm(vcpu); 3113 3114 u32 ecx = msr->index; 3115 u64 data = msr->data; 3116 switch (ecx) { 3117 case MSR_IA32_TSC: 3118 kvm_write_tsc(vcpu, msr); 3119 break; 3120 case MSR_STAR: 3121 svm->vmcb->save.star = data; 3122 break; 3123#ifdef CONFIG_X86_64 3124 case MSR_LSTAR: 3125 svm->vmcb->save.lstar = data; 3126 break; 3127 case MSR_CSTAR: 3128 svm->vmcb->save.cstar = data; 3129 break; 3130 case MSR_KERNEL_GS_BASE: 3131 svm->vmcb->save.kernel_gs_base = data; 3132 break; 3133 case MSR_SYSCALL_MASK: 3134 svm->vmcb->save.sfmask = data; 3135 break; 3136#endif 3137 case MSR_IA32_SYSENTER_CS: 3138 svm->vmcb->save.sysenter_cs = data; 3139 break; 3140 case MSR_IA32_SYSENTER_EIP: 3141 svm->sysenter_eip = data; 3142 svm->vmcb->save.sysenter_eip = data; 3143 break; 3144 case MSR_IA32_SYSENTER_ESP: 3145 svm->sysenter_esp = data; 3146 svm->vmcb->save.sysenter_esp = data; 3147 break; 3148 case MSR_IA32_DEBUGCTLMSR: 3149 if (!boot_cpu_has(X86_FEATURE_LBRV)) { 3150 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", 3151 __func__, data); 3152 break; 3153 } 3154 if (data & DEBUGCTL_RESERVED_BITS) 3155 return 1; 3156 3157 svm->vmcb->save.dbgctl = data; 3158 mark_dirty(svm->vmcb, VMCB_LBR); 3159 if (data & (1ULL<<0)) 3160 svm_enable_lbrv(svm); 3161 else 3162 svm_disable_lbrv(svm); 3163 break; 3164 case MSR_VM_HSAVE_PA: 3165 svm->nested.hsave_msr = data; 3166 break; 3167 case MSR_VM_CR: 3168 return svm_set_vm_cr(vcpu, data); 3169 case MSR_VM_IGNNE: 3170 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); 3171 break; 3172 default: 3173 return kvm_set_msr_common(vcpu, msr); 3174 } 3175 return 0; 3176} 3177 3178static int wrmsr_interception(struct vcpu_svm *svm) 3179{ 3180 struct msr_data msr; 3181 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); 3182 u64 data = kvm_read_edx_eax(&svm->vcpu); 3183 3184 msr.data = data; 3185 msr.index = ecx; 3186 msr.host_initiated = false; 3187 3188 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3189 if (kvm_set_msr(&svm->vcpu, &msr)) { 3190 trace_kvm_msr_write_ex(ecx, data); 3191 kvm_inject_gp(&svm->vcpu, 0); 3192 } else { 3193 trace_kvm_msr_write(ecx, data); 3194 skip_emulated_instruction(&svm->vcpu); 3195 } 3196 return 1; 3197} 3198 3199static int msr_interception(struct vcpu_svm *svm) 3200{ 3201 if (svm->vmcb->control.exit_info_1) 3202 return wrmsr_interception(svm); 3203 else 3204 return rdmsr_interception(svm); 3205} 3206 3207static int interrupt_window_interception(struct vcpu_svm *svm) 3208{ 3209 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 3210 svm_clear_vintr(svm); 3211 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 3212 mark_dirty(svm->vmcb, VMCB_INTR); 3213 ++svm->vcpu.stat.irq_window_exits; 3214 return 1; 3215} 3216 3217static int pause_interception(struct vcpu_svm *svm) 3218{ 3219 kvm_vcpu_on_spin(&(svm->vcpu)); 3220 return 1; 3221} 3222 3223static int nop_interception(struct vcpu_svm *svm) 3224{ 3225 skip_emulated_instruction(&(svm->vcpu)); 3226 return 1; 3227} 3228 3229static int monitor_interception(struct vcpu_svm *svm) 3230{ 3231 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); 3232 return nop_interception(svm); 3233} 3234 3235static int mwait_interception(struct vcpu_svm *svm) 3236{ 3237 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); 3238 return nop_interception(svm); 3239} 3240 3241static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { 3242 [SVM_EXIT_READ_CR0] = cr_interception, 3243 [SVM_EXIT_READ_CR3] = cr_interception, 3244 [SVM_EXIT_READ_CR4] = cr_interception, 3245 [SVM_EXIT_READ_CR8] = cr_interception, 3246 [SVM_EXIT_CR0_SEL_WRITE] = cr_interception, 3247 [SVM_EXIT_WRITE_CR0] = cr_interception, 3248 [SVM_EXIT_WRITE_CR3] = cr_interception, 3249 [SVM_EXIT_WRITE_CR4] = cr_interception, 3250 [SVM_EXIT_WRITE_CR8] = cr8_write_interception, 3251 [SVM_EXIT_READ_DR0] = dr_interception, 3252 [SVM_EXIT_READ_DR1] = dr_interception, 3253 [SVM_EXIT_READ_DR2] = dr_interception, 3254 [SVM_EXIT_READ_DR3] = dr_interception, 3255 [SVM_EXIT_READ_DR4] = dr_interception, 3256 [SVM_EXIT_READ_DR5] = dr_interception, 3257 [SVM_EXIT_READ_DR6] = dr_interception, 3258 [SVM_EXIT_READ_DR7] = dr_interception, 3259 [SVM_EXIT_WRITE_DR0] = dr_interception, 3260 [SVM_EXIT_WRITE_DR1] = dr_interception, 3261 [SVM_EXIT_WRITE_DR2] = dr_interception, 3262 [SVM_EXIT_WRITE_DR3] = dr_interception, 3263 [SVM_EXIT_WRITE_DR4] = dr_interception, 3264 [SVM_EXIT_WRITE_DR5] = dr_interception, 3265 [SVM_EXIT_WRITE_DR6] = dr_interception, 3266 [SVM_EXIT_WRITE_DR7] = dr_interception, 3267 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, 3268 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, 3269 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 3270 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 3271 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 3272 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, 3273 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, 3274 [SVM_EXIT_INTR] = intr_interception, 3275 [SVM_EXIT_NMI] = nmi_interception, 3276 [SVM_EXIT_SMI] = nop_on_interception, 3277 [SVM_EXIT_INIT] = nop_on_interception, 3278 [SVM_EXIT_VINTR] = interrupt_window_interception, 3279 [SVM_EXIT_RDPMC] = rdpmc_interception, 3280 [SVM_EXIT_CPUID] = cpuid_interception, 3281 [SVM_EXIT_IRET] = iret_interception, 3282 [SVM_EXIT_INVD] = emulate_on_interception, 3283 [SVM_EXIT_PAUSE] = pause_interception, 3284 [SVM_EXIT_HLT] = halt_interception, 3285 [SVM_EXIT_INVLPG] = invlpg_interception, 3286 [SVM_EXIT_INVLPGA] = invlpga_interception, 3287 [SVM_EXIT_IOIO] = io_interception, 3288 [SVM_EXIT_MSR] = msr_interception, 3289 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 3290 [SVM_EXIT_SHUTDOWN] = shutdown_interception, 3291 [SVM_EXIT_VMRUN] = vmrun_interception, 3292 [SVM_EXIT_VMMCALL] = vmmcall_interception, 3293 [SVM_EXIT_VMLOAD] = vmload_interception, 3294 [SVM_EXIT_VMSAVE] = vmsave_interception, 3295 [SVM_EXIT_STGI] = stgi_interception, 3296 [SVM_EXIT_CLGI] = clgi_interception, 3297 [SVM_EXIT_SKINIT] = skinit_interception, 3298 [SVM_EXIT_WBINVD] = wbinvd_interception, 3299 [SVM_EXIT_MONITOR] = monitor_interception, 3300 [SVM_EXIT_MWAIT] = mwait_interception, 3301 [SVM_EXIT_XSETBV] = xsetbv_interception, 3302 [SVM_EXIT_NPF] = pf_interception, 3303 [SVM_EXIT_RSM] = emulate_on_interception, 3304}; 3305 3306static void dump_vmcb(struct kvm_vcpu *vcpu) 3307{ 3308 struct vcpu_svm *svm = to_svm(vcpu); 3309 struct vmcb_control_area *control = &svm->vmcb->control; 3310 struct vmcb_save_area *save = &svm->vmcb->save; 3311 3312 pr_err("VMCB Control Area:\n"); 3313 pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); 3314 pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); 3315 pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); 3316 pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); 3317 pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); 3318 pr_err("%-20s%016llx\n", "intercepts:", control->intercept); 3319 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); 3320 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); 3321 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); 3322 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); 3323 pr_err("%-20s%d\n", "asid:", control->asid); 3324 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); 3325 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); 3326 pr_err("%-20s%08x\n", "int_vector:", control->int_vector); 3327 pr_err("%-20s%08x\n", "int_state:", control->int_state); 3328 pr_err("%-20s%08x\n", "exit_code:", control->exit_code); 3329 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); 3330 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); 3331 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); 3332 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); 3333 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); 3334 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); 3335 pr_err("%-20s%08x\n", "event_inj:", control->event_inj); 3336 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); 3337 pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); 3338 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); 3339 pr_err("VMCB State Save Area:\n"); 3340 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3341 "es:", 3342 save->es.selector, save->es.attrib, 3343 save->es.limit, save->es.base); 3344 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3345 "cs:", 3346 save->cs.selector, save->cs.attrib, 3347 save->cs.limit, save->cs.base); 3348 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3349 "ss:", 3350 save->ss.selector, save->ss.attrib, 3351 save->ss.limit, save->ss.base); 3352 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3353 "ds:", 3354 save->ds.selector, save->ds.attrib, 3355 save->ds.limit, save->ds.base); 3356 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3357 "fs:", 3358 save->fs.selector, save->fs.attrib, 3359 save->fs.limit, save->fs.base); 3360 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3361 "gs:", 3362 save->gs.selector, save->gs.attrib, 3363 save->gs.limit, save->gs.base); 3364 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3365 "gdtr:", 3366 save->gdtr.selector, save->gdtr.attrib, 3367 save->gdtr.limit, save->gdtr.base); 3368 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3369 "ldtr:", 3370 save->ldtr.selector, save->ldtr.attrib, 3371 save->ldtr.limit, save->ldtr.base); 3372 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3373 "idtr:", 3374 save->idtr.selector, save->idtr.attrib, 3375 save->idtr.limit, save->idtr.base); 3376 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3377 "tr:", 3378 save->tr.selector, save->tr.attrib, 3379 save->tr.limit, save->tr.base); 3380 pr_err("cpl: %d efer: %016llx\n", 3381 save->cpl, save->efer); 3382 pr_err("%-15s %016llx %-13s %016llx\n", 3383 "cr0:", save->cr0, "cr2:", save->cr2); 3384 pr_err("%-15s %016llx %-13s %016llx\n", 3385 "cr3:", save->cr3, "cr4:", save->cr4); 3386 pr_err("%-15s %016llx %-13s %016llx\n", 3387 "dr6:", save->dr6, "dr7:", save->dr7); 3388 pr_err("%-15s %016llx %-13s %016llx\n", 3389 "rip:", save->rip, "rflags:", save->rflags); 3390 pr_err("%-15s %016llx %-13s %016llx\n", 3391 "rsp:", save->rsp, "rax:", save->rax); 3392 pr_err("%-15s %016llx %-13s %016llx\n", 3393 "star:", save->star, "lstar:", save->lstar); 3394 pr_err("%-15s %016llx %-13s %016llx\n", 3395 "cstar:", save->cstar, "sfmask:", save->sfmask); 3396 pr_err("%-15s %016llx %-13s %016llx\n", 3397 "kernel_gs_base:", save->kernel_gs_base, 3398 "sysenter_cs:", save->sysenter_cs); 3399 pr_err("%-15s %016llx %-13s %016llx\n", 3400 "sysenter_esp:", save->sysenter_esp, 3401 "sysenter_eip:", save->sysenter_eip); 3402 pr_err("%-15s %016llx %-13s %016llx\n", 3403 "gpat:", save->g_pat, "dbgctl:", save->dbgctl); 3404 pr_err("%-15s %016llx %-13s %016llx\n", 3405 "br_from:", save->br_from, "br_to:", save->br_to); 3406 pr_err("%-15s %016llx %-13s %016llx\n", 3407 "excp_from:", save->last_excp_from, 3408 "excp_to:", save->last_excp_to); 3409} 3410 3411static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) 3412{ 3413 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; 3414 3415 *info1 = control->exit_info_1; 3416 *info2 = control->exit_info_2; 3417} 3418 3419static int handle_exit(struct kvm_vcpu *vcpu) 3420{ 3421 struct vcpu_svm *svm = to_svm(vcpu); 3422 struct kvm_run *kvm_run = vcpu->run; 3423 u32 exit_code = svm->vmcb->control.exit_code; 3424 3425 trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); 3426 3427 if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) 3428 vcpu->arch.cr0 = svm->vmcb->save.cr0; 3429 if (npt_enabled) 3430 vcpu->arch.cr3 = svm->vmcb->save.cr3; 3431 3432 if (unlikely(svm->nested.exit_required)) { 3433 nested_svm_vmexit(svm); 3434 svm->nested.exit_required = false; 3435 3436 return 1; 3437 } 3438 3439 if (is_guest_mode(vcpu)) { 3440 int vmexit; 3441 3442 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, 3443 svm->vmcb->control.exit_info_1, 3444 svm->vmcb->control.exit_info_2, 3445 svm->vmcb->control.exit_int_info, 3446 svm->vmcb->control.exit_int_info_err, 3447 KVM_ISA_SVM); 3448 3449 vmexit = nested_svm_exit_special(svm); 3450 3451 if (vmexit == NESTED_EXIT_CONTINUE) 3452 vmexit = nested_svm_exit_handled(svm); 3453 3454 if (vmexit == NESTED_EXIT_DONE) 3455 return 1; 3456 } 3457 3458 svm_complete_interrupts(svm); 3459 3460 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 3461 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 3462 kvm_run->fail_entry.hardware_entry_failure_reason 3463 = svm->vmcb->control.exit_code; 3464 pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); 3465 dump_vmcb(vcpu); 3466 return 0; 3467 } 3468 3469 if (is_external_interrupt(svm->vmcb->control.exit_int_info) && 3470 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && 3471 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && 3472 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) 3473 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " 3474 "exit_code 0x%x\n", 3475 __func__, svm->vmcb->control.exit_int_info, 3476 exit_code); 3477 3478 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 3479 || !svm_exit_handlers[exit_code]) { 3480 WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code); 3481 kvm_queue_exception(vcpu, UD_VECTOR); 3482 return 1; 3483 } 3484 3485 return svm_exit_handlers[exit_code](svm); 3486} 3487 3488static void reload_tss(struct kvm_vcpu *vcpu) 3489{ 3490 int cpu = raw_smp_processor_id(); 3491 3492 struct svm_cpu_data *sd = per_cpu(svm_data, cpu); 3493 sd->tss_desc->type = 9; /* available 32/64-bit TSS */ 3494 load_TR_desc(); 3495} 3496 3497static void pre_svm_run(struct vcpu_svm *svm) 3498{ 3499 int cpu = raw_smp_processor_id(); 3500 3501 struct svm_cpu_data *sd = per_cpu(svm_data, cpu); 3502 3503 /* FIXME: handle wraparound of asid_generation */ 3504 if (svm->asid_generation != sd->asid_generation) 3505 new_asid(svm, sd); 3506} 3507 3508static void svm_inject_nmi(struct kvm_vcpu *vcpu) 3509{ 3510 struct vcpu_svm *svm = to_svm(vcpu); 3511 3512 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; 3513 vcpu->arch.hflags |= HF_NMI_MASK; 3514 set_intercept(svm, INTERCEPT_IRET); 3515 ++vcpu->stat.nmi_injections; 3516} 3517 3518static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) 3519{ 3520 struct vmcb_control_area *control; 3521 3522 control = &svm->vmcb->control; 3523 control->int_vector = irq; 3524 control->int_ctl &= ~V_INTR_PRIO_MASK; 3525 control->int_ctl |= V_IRQ_MASK | 3526 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); 3527 mark_dirty(svm->vmcb, VMCB_INTR); 3528} 3529 3530static void svm_set_irq(struct kvm_vcpu *vcpu) 3531{ 3532 struct vcpu_svm *svm = to_svm(vcpu); 3533 3534 BUG_ON(!(gif_set(svm))); 3535 3536 trace_kvm_inj_virq(vcpu->arch.interrupt.nr); 3537 ++vcpu->stat.irq_injections; 3538 3539 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | 3540 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; 3541} 3542 3543static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 3544{ 3545 struct vcpu_svm *svm = to_svm(vcpu); 3546 3547 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 3548 return; 3549 3550 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); 3551 3552 if (irr == -1) 3553 return; 3554 3555 if (tpr >= irr) 3556 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 3557} 3558 3559static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) 3560{ 3561 return; 3562} 3563 3564static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu) 3565{ 3566 return 0; 3567} 3568 3569static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu) 3570{ 3571 return; 3572} 3573 3574static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) 3575{ 3576 return; 3577} 3578 3579static int svm_nmi_allowed(struct kvm_vcpu *vcpu) 3580{ 3581 struct vcpu_svm *svm = to_svm(vcpu); 3582 struct vmcb *vmcb = svm->vmcb; 3583 int ret; 3584 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && 3585 !(svm->vcpu.arch.hflags & HF_NMI_MASK); 3586 ret = ret && gif_set(svm) && nested_svm_nmi(svm); 3587 3588 return ret; 3589} 3590 3591static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) 3592{ 3593 struct vcpu_svm *svm = to_svm(vcpu); 3594 3595 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); 3596} 3597 3598static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) 3599{ 3600 struct vcpu_svm *svm = to_svm(vcpu); 3601 3602 if (masked) { 3603 svm->vcpu.arch.hflags |= HF_NMI_MASK; 3604 set_intercept(svm, INTERCEPT_IRET); 3605 } else { 3606 svm->vcpu.arch.hflags &= ~HF_NMI_MASK; 3607 clr_intercept(svm, INTERCEPT_IRET); 3608 } 3609} 3610 3611static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) 3612{ 3613 struct vcpu_svm *svm = to_svm(vcpu); 3614 struct vmcb *vmcb = svm->vmcb; 3615 int ret; 3616 3617 if (!gif_set(svm) || 3618 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) 3619 return 0; 3620 3621 ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); 3622 3623 if (is_guest_mode(vcpu)) 3624 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); 3625 3626 return ret; 3627} 3628 3629static void enable_irq_window(struct kvm_vcpu *vcpu) 3630{ 3631 struct vcpu_svm *svm = to_svm(vcpu); 3632 3633 /* 3634 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes 3635 * 1, because that's a separate STGI/VMRUN intercept. The next time we 3636 * get that intercept, this function will be called again though and 3637 * we'll get the vintr intercept. 3638 */ 3639 if (gif_set(svm) && nested_svm_intr(svm)) { 3640 svm_set_vintr(svm); 3641 svm_inject_irq(svm, 0x0); 3642 } 3643} 3644 3645static void enable_nmi_window(struct kvm_vcpu *vcpu) 3646{ 3647 struct vcpu_svm *svm = to_svm(vcpu); 3648 3649 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) 3650 == HF_NMI_MASK) 3651 return; /* IRET will cause a vm exit */ 3652 3653 /* 3654 * Something prevents NMI from been injected. Single step over possible 3655 * problem (IRET or exception injection or interrupt shadow) 3656 */ 3657 svm->nmi_singlestep = true; 3658 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); 3659} 3660 3661static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) 3662{ 3663 return 0; 3664} 3665 3666static void svm_flush_tlb(struct kvm_vcpu *vcpu) 3667{ 3668 struct vcpu_svm *svm = to_svm(vcpu); 3669 3670 if (static_cpu_has(X86_FEATURE_FLUSHBYASID)) 3671 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; 3672 else 3673 svm->asid_generation--; 3674} 3675 3676static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) 3677{ 3678} 3679 3680static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) 3681{ 3682 struct vcpu_svm *svm = to_svm(vcpu); 3683 3684 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 3685 return; 3686 3687 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { 3688 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; 3689 kvm_set_cr8(vcpu, cr8); 3690 } 3691} 3692 3693static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) 3694{ 3695 struct vcpu_svm *svm = to_svm(vcpu); 3696 u64 cr8; 3697 3698 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 3699 return; 3700 3701 cr8 = kvm_get_cr8(vcpu); 3702 svm->vmcb->control.int_ctl &= ~V_TPR_MASK; 3703 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; 3704} 3705 3706static void svm_complete_interrupts(struct vcpu_svm *svm) 3707{ 3708 u8 vector; 3709 int type; 3710 u32 exitintinfo = svm->vmcb->control.exit_int_info; 3711 unsigned int3_injected = svm->int3_injected; 3712 3713 svm->int3_injected = 0; 3714 3715 /* 3716 * If we've made progress since setting HF_IRET_MASK, we've 3717 * executed an IRET and can allow NMI injection. 3718 */ 3719 if ((svm->vcpu.arch.hflags & HF_IRET_MASK) 3720 && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { 3721 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); 3722 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 3723 } 3724 3725 svm->vcpu.arch.nmi_injected = false; 3726 kvm_clear_exception_queue(&svm->vcpu); 3727 kvm_clear_interrupt_queue(&svm->vcpu); 3728 3729 if (!(exitintinfo & SVM_EXITINTINFO_VALID)) 3730 return; 3731 3732 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 3733 3734 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK; 3735 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK; 3736 3737 switch (type) { 3738 case SVM_EXITINTINFO_TYPE_NMI: 3739 svm->vcpu.arch.nmi_injected = true; 3740 break; 3741 case SVM_EXITINTINFO_TYPE_EXEPT: 3742 /* 3743 * In case of software exceptions, do not reinject the vector, 3744 * but re-execute the instruction instead. Rewind RIP first 3745 * if we emulated INT3 before. 3746 */ 3747 if (kvm_exception_is_soft(vector)) { 3748 if (vector == BP_VECTOR && int3_injected && 3749 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) 3750 kvm_rip_write(&svm->vcpu, 3751 kvm_rip_read(&svm->vcpu) - 3752 int3_injected); 3753 break; 3754 } 3755 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { 3756 u32 err = svm->vmcb->control.exit_int_info_err; 3757 kvm_requeue_exception_e(&svm->vcpu, vector, err); 3758 3759 } else 3760 kvm_requeue_exception(&svm->vcpu, vector); 3761 break; 3762 case SVM_EXITINTINFO_TYPE_INTR: 3763 kvm_queue_interrupt(&svm->vcpu, vector, false); 3764 break; 3765 default: 3766 break; 3767 } 3768} 3769 3770static void svm_cancel_injection(struct kvm_vcpu *vcpu) 3771{ 3772 struct vcpu_svm *svm = to_svm(vcpu); 3773 struct vmcb_control_area *control = &svm->vmcb->control; 3774 3775 control->exit_int_info = control->event_inj; 3776 control->exit_int_info_err = control->event_inj_err; 3777 control->event_inj = 0; 3778 svm_complete_interrupts(svm); 3779} 3780 3781static void svm_vcpu_run(struct kvm_vcpu *vcpu) 3782{ 3783 struct vcpu_svm *svm = to_svm(vcpu); 3784 3785 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; 3786 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; 3787 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; 3788 3789 /* 3790 * A vmexit emulation is required before the vcpu can be executed 3791 * again. 3792 */ 3793 if (unlikely(svm->nested.exit_required)) 3794 return; 3795 3796 pre_svm_run(svm); 3797 3798 sync_lapic_to_cr8(vcpu); 3799 3800 svm->vmcb->save.cr2 = vcpu->arch.cr2; 3801 3802 clgi(); 3803 3804 local_irq_enable(); 3805 3806 asm volatile ( 3807 "push %%" _ASM_BP "; \n\t" 3808 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" 3809 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t" 3810 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t" 3811 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t" 3812 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t" 3813 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t" 3814#ifdef CONFIG_X86_64 3815 "mov %c[r8](%[svm]), %%r8 \n\t" 3816 "mov %c[r9](%[svm]), %%r9 \n\t" 3817 "mov %c[r10](%[svm]), %%r10 \n\t" 3818 "mov %c[r11](%[svm]), %%r11 \n\t" 3819 "mov %c[r12](%[svm]), %%r12 \n\t" 3820 "mov %c[r13](%[svm]), %%r13 \n\t" 3821 "mov %c[r14](%[svm]), %%r14 \n\t" 3822 "mov %c[r15](%[svm]), %%r15 \n\t" 3823#endif 3824 3825 /* Enter guest mode */ 3826 "push %%" _ASM_AX " \n\t" 3827 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t" 3828 __ex(SVM_VMLOAD) "\n\t" 3829 __ex(SVM_VMRUN) "\n\t" 3830 __ex(SVM_VMSAVE) "\n\t" 3831 "pop %%" _ASM_AX " \n\t" 3832 3833 /* Save guest registers, load host registers */ 3834 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t" 3835 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t" 3836 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t" 3837 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t" 3838 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t" 3839 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t" 3840#ifdef CONFIG_X86_64 3841 "mov %%r8, %c[r8](%[svm]) \n\t" 3842 "mov %%r9, %c[r9](%[svm]) \n\t" 3843 "mov %%r10, %c[r10](%[svm]) \n\t" 3844 "mov %%r11, %c[r11](%[svm]) \n\t" 3845 "mov %%r12, %c[r12](%[svm]) \n\t" 3846 "mov %%r13, %c[r13](%[svm]) \n\t" 3847 "mov %%r14, %c[r14](%[svm]) \n\t" 3848 "mov %%r15, %c[r15](%[svm]) \n\t" 3849#endif 3850 "pop %%" _ASM_BP 3851 : 3852 : [svm]"a"(svm), 3853 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), 3854 [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])), 3855 [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])), 3856 [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])), 3857 [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])), 3858 [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])), 3859 [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP])) 3860#ifdef CONFIG_X86_64 3861 , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])), 3862 [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])), 3863 [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])), 3864 [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])), 3865 [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])), 3866 [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])), 3867 [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])), 3868 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) 3869#endif 3870 : "cc", "memory" 3871#ifdef CONFIG_X86_64 3872 , "rbx", "rcx", "rdx", "rsi", "rdi" 3873 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" 3874#else 3875 , "ebx", "ecx", "edx", "esi", "edi" 3876#endif 3877 ); 3878 3879#ifdef CONFIG_X86_64 3880 wrmsrl(MSR_GS_BASE, svm->host.gs_base); 3881#else 3882 loadsegment(fs, svm->host.fs); 3883#ifndef CONFIG_X86_32_LAZY_GS 3884 loadsegment(gs, svm->host.gs); 3885#endif 3886#endif 3887 3888 reload_tss(vcpu); 3889 3890 local_irq_disable(); 3891 3892 vcpu->arch.cr2 = svm->vmcb->save.cr2; 3893 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; 3894 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; 3895 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; 3896 3897 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) 3898 kvm_before_handle_nmi(&svm->vcpu); 3899 3900 stgi(); 3901 3902 /* Any pending NMI will happen here */ 3903 3904 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) 3905 kvm_after_handle_nmi(&svm->vcpu); 3906 3907 sync_cr8_to_lapic(vcpu); 3908 3909 svm->next_rip = 0; 3910 3911 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 3912 3913 /* if exit due to PF check for async PF */ 3914 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) 3915 svm->apf_reason = kvm_read_and_reset_pf_reason(); 3916 3917 if (npt_enabled) { 3918 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); 3919 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); 3920 } 3921 3922 /* 3923 * We need to handle MC intercepts here before the vcpu has a chance to 3924 * change the physical cpu 3925 */ 3926 if (unlikely(svm->vmcb->control.exit_code == 3927 SVM_EXIT_EXCP_BASE + MC_VECTOR)) 3928 svm_handle_mce(svm); 3929 3930 mark_all_clean(svm->vmcb); 3931} 3932 3933static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) 3934{ 3935 struct vcpu_svm *svm = to_svm(vcpu); 3936 3937 svm->vmcb->save.cr3 = root; 3938 mark_dirty(svm->vmcb, VMCB_CR); 3939 svm_flush_tlb(vcpu); 3940} 3941 3942static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) 3943{ 3944 struct vcpu_svm *svm = to_svm(vcpu); 3945 3946 svm->vmcb->control.nested_cr3 = root; 3947 mark_dirty(svm->vmcb, VMCB_NPT); 3948 3949 /* Also sync guest cr3 here in case we live migrate */ 3950 svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); 3951 mark_dirty(svm->vmcb, VMCB_CR); 3952 3953 svm_flush_tlb(vcpu); 3954} 3955 3956static int is_disabled(void) 3957{ 3958 u64 vm_cr; 3959 3960 rdmsrl(MSR_VM_CR, vm_cr); 3961 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE)) 3962 return 1; 3963 3964 return 0; 3965} 3966 3967static void 3968svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) 3969{ 3970 /* 3971 * Patch in the VMMCALL instruction: 3972 */ 3973 hypercall[0] = 0x0f; 3974 hypercall[1] = 0x01; 3975 hypercall[2] = 0xd9; 3976} 3977 3978static void svm_check_processor_compat(void *rtn) 3979{ 3980 *(int *)rtn = 0; 3981} 3982 3983static bool svm_cpu_has_accelerated_tpr(void) 3984{ 3985 return false; 3986} 3987 3988static bool svm_has_high_real_mode_segbase(void) 3989{ 3990 return true; 3991} 3992 3993static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) 3994{ 3995 return 0; 3996} 3997 3998static void svm_cpuid_update(struct kvm_vcpu *vcpu) 3999{ 4000 struct vcpu_svm *svm = to_svm(vcpu); 4001 4002 /* Update nrips enabled cache */ 4003 svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); 4004} 4005 4006static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) 4007{ 4008 switch (func) { 4009 case 0x80000001: 4010 if (nested) 4011 entry->ecx |= (1 << 2); /* Set SVM bit */ 4012 break; 4013 case 0x8000000A: 4014 entry->eax = 1; /* SVM revision 1 */ 4015 entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper 4016 ASID emulation to nested SVM */ 4017 entry->ecx = 0; /* Reserved */ 4018 entry->edx = 0; /* Per default do not support any 4019 additional features */ 4020 4021 /* Support next_rip if host supports it */ 4022 if (boot_cpu_has(X86_FEATURE_NRIPS)) 4023 entry->edx |= SVM_FEATURE_NRIP; 4024 4025 /* Support NPT for the guest if enabled */ 4026 if (npt_enabled) 4027 entry->edx |= SVM_FEATURE_NPT; 4028 4029 break; 4030 } 4031} 4032 4033static int svm_get_lpage_level(void) 4034{ 4035 return PT_PDPE_LEVEL; 4036} 4037 4038static bool svm_rdtscp_supported(void) 4039{ 4040 return false; 4041} 4042 4043static bool svm_invpcid_supported(void) 4044{ 4045 return false; 4046} 4047 4048static bool svm_mpx_supported(void) 4049{ 4050 return false; 4051} 4052 4053static bool svm_xsaves_supported(void) 4054{ 4055 return false; 4056} 4057 4058static bool svm_has_wbinvd_exit(void) 4059{ 4060 return true; 4061} 4062 4063static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) 4064{ 4065 struct vcpu_svm *svm = to_svm(vcpu); 4066 4067 set_exception_intercept(svm, NM_VECTOR); 4068 update_cr0_intercept(svm); 4069} 4070 4071#define PRE_EX(exit) { .exit_code = (exit), \ 4072 .stage = X86_ICPT_PRE_EXCEPT, } 4073#define POST_EX(exit) { .exit_code = (exit), \ 4074 .stage = X86_ICPT_POST_EXCEPT, } 4075#define POST_MEM(exit) { .exit_code = (exit), \ 4076 .stage = X86_ICPT_POST_MEMACCESS, } 4077 4078static const struct __x86_intercept { 4079 u32 exit_code; 4080 enum x86_intercept_stage stage; 4081} x86_intercept_map[] = { 4082 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), 4083 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), 4084 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), 4085 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), 4086 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), 4087 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), 4088 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), 4089 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), 4090 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), 4091 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), 4092 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), 4093 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), 4094 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), 4095 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), 4096 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), 4097 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), 4098 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), 4099 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), 4100 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), 4101 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), 4102 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), 4103 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), 4104 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), 4105 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), 4106 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), 4107 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), 4108 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), 4109 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), 4110 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), 4111 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), 4112 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), 4113 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), 4114 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), 4115 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), 4116 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), 4117 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), 4118 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), 4119 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), 4120 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), 4121 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), 4122 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), 4123 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), 4124 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), 4125 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), 4126 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), 4127 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), 4128}; 4129 4130#undef PRE_EX 4131#undef POST_EX 4132#undef POST_MEM 4133 4134static int svm_check_intercept(struct kvm_vcpu *vcpu, 4135 struct x86_instruction_info *info, 4136 enum x86_intercept_stage stage) 4137{ 4138 struct vcpu_svm *svm = to_svm(vcpu); 4139 int vmexit, ret = X86EMUL_CONTINUE; 4140 struct __x86_intercept icpt_info; 4141 struct vmcb *vmcb = svm->vmcb; 4142 4143 if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) 4144 goto out; 4145 4146 icpt_info = x86_intercept_map[info->intercept]; 4147 4148 if (stage != icpt_info.stage) 4149 goto out; 4150 4151 switch (icpt_info.exit_code) { 4152 case SVM_EXIT_READ_CR0: 4153 if (info->intercept == x86_intercept_cr_read) 4154 icpt_info.exit_code += info->modrm_reg; 4155 break; 4156 case SVM_EXIT_WRITE_CR0: { 4157 unsigned long cr0, val; 4158 u64 intercept; 4159 4160 if (info->intercept == x86_intercept_cr_write) 4161 icpt_info.exit_code += info->modrm_reg; 4162 4163 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 || 4164 info->intercept == x86_intercept_clts) 4165 break; 4166 4167 intercept = svm->nested.intercept; 4168 4169 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) 4170 break; 4171 4172 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; 4173 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; 4174 4175 if (info->intercept == x86_intercept_lmsw) { 4176 cr0 &= 0xfUL; 4177 val &= 0xfUL; 4178 /* lmsw can't clear PE - catch this here */ 4179 if (cr0 & X86_CR0_PE) 4180 val |= X86_CR0_PE; 4181 } 4182 4183 if (cr0 ^ val) 4184 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; 4185 4186 break; 4187 } 4188 case SVM_EXIT_READ_DR0: 4189 case SVM_EXIT_WRITE_DR0: 4190 icpt_info.exit_code += info->modrm_reg; 4191 break; 4192 case SVM_EXIT_MSR: 4193 if (info->intercept == x86_intercept_wrmsr) 4194 vmcb->control.exit_info_1 = 1; 4195 else 4196 vmcb->control.exit_info_1 = 0; 4197 break; 4198 case SVM_EXIT_PAUSE: 4199 /* 4200 * We get this for NOP only, but pause 4201 * is rep not, check this here 4202 */ 4203 if (info->rep_prefix != REPE_PREFIX) 4204 goto out; 4205 case SVM_EXIT_IOIO: { 4206 u64 exit_info; 4207 u32 bytes; 4208 4209 if (info->intercept == x86_intercept_in || 4210 info->intercept == x86_intercept_ins) { 4211 exit_info = ((info->src_val & 0xffff) << 16) | 4212 SVM_IOIO_TYPE_MASK; 4213 bytes = info->dst_bytes; 4214 } else { 4215 exit_info = (info->dst_val & 0xffff) << 16; 4216 bytes = info->src_bytes; 4217 } 4218 4219 if (info->intercept == x86_intercept_outs || 4220 info->intercept == x86_intercept_ins) 4221 exit_info |= SVM_IOIO_STR_MASK; 4222 4223 if (info->rep_prefix) 4224 exit_info |= SVM_IOIO_REP_MASK; 4225 4226 bytes = min(bytes, 4u); 4227 4228 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; 4229 4230 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); 4231 4232 vmcb->control.exit_info_1 = exit_info; 4233 vmcb->control.exit_info_2 = info->next_rip; 4234 4235 break; 4236 } 4237 default: 4238 break; 4239 } 4240 4241 /* TODO: Advertise NRIPS to guest hypervisor unconditionally */ 4242 if (static_cpu_has(X86_FEATURE_NRIPS)) 4243 vmcb->control.next_rip = info->next_rip; 4244 vmcb->control.exit_code = icpt_info.exit_code; 4245 vmexit = nested_svm_exit_handled(svm); 4246 4247 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED 4248 : X86EMUL_CONTINUE; 4249 4250out: 4251 return ret; 4252} 4253 4254static void svm_handle_external_intr(struct kvm_vcpu *vcpu) 4255{ 4256 local_irq_enable(); 4257} 4258 4259static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) 4260{ 4261} 4262 4263static struct kvm_x86_ops svm_x86_ops = { 4264 .cpu_has_kvm_support = has_svm, 4265 .disabled_by_bios = is_disabled, 4266 .hardware_setup = svm_hardware_setup, 4267 .hardware_unsetup = svm_hardware_unsetup, 4268 .check_processor_compatibility = svm_check_processor_compat, 4269 .hardware_enable = svm_hardware_enable, 4270 .hardware_disable = svm_hardware_disable, 4271 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, 4272 .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, 4273 4274 .vcpu_create = svm_create_vcpu, 4275 .vcpu_free = svm_free_vcpu, 4276 .vcpu_reset = svm_vcpu_reset, 4277 4278 .prepare_guest_switch = svm_prepare_guest_switch, 4279 .vcpu_load = svm_vcpu_load, 4280 .vcpu_put = svm_vcpu_put, 4281 4282 .update_bp_intercept = update_bp_intercept, 4283 .get_msr = svm_get_msr, 4284 .set_msr = svm_set_msr, 4285 .get_segment_base = svm_get_segment_base, 4286 .get_segment = svm_get_segment, 4287 .set_segment = svm_set_segment, 4288 .get_cpl = svm_get_cpl, 4289 .get_cs_db_l_bits = kvm_get_cs_db_l_bits, 4290 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, 4291 .decache_cr3 = svm_decache_cr3, 4292 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 4293 .set_cr0 = svm_set_cr0, 4294 .set_cr3 = svm_set_cr3, 4295 .set_cr4 = svm_set_cr4, 4296 .set_efer = svm_set_efer, 4297 .get_idt = svm_get_idt, 4298 .set_idt = svm_set_idt, 4299 .get_gdt = svm_get_gdt, 4300 .set_gdt = svm_set_gdt, 4301 .get_dr6 = svm_get_dr6, 4302 .set_dr6 = svm_set_dr6, 4303 .set_dr7 = svm_set_dr7, 4304 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, 4305 .cache_reg = svm_cache_reg, 4306 .get_rflags = svm_get_rflags, 4307 .set_rflags = svm_set_rflags, 4308 .fpu_activate = svm_fpu_activate, 4309 .fpu_deactivate = svm_fpu_deactivate, 4310 4311 .tlb_flush = svm_flush_tlb, 4312 4313 .run = svm_vcpu_run, 4314 .handle_exit = handle_exit, 4315 .skip_emulated_instruction = skip_emulated_instruction, 4316 .set_interrupt_shadow = svm_set_interrupt_shadow, 4317 .get_interrupt_shadow = svm_get_interrupt_shadow, 4318 .patch_hypercall = svm_patch_hypercall, 4319 .set_irq = svm_set_irq, 4320 .set_nmi = svm_inject_nmi, 4321 .queue_exception = svm_queue_exception, 4322 .cancel_injection = svm_cancel_injection, 4323 .interrupt_allowed = svm_interrupt_allowed, 4324 .nmi_allowed = svm_nmi_allowed, 4325 .get_nmi_mask = svm_get_nmi_mask, 4326 .set_nmi_mask = svm_set_nmi_mask, 4327 .enable_nmi_window = enable_nmi_window, 4328 .enable_irq_window = enable_irq_window, 4329 .update_cr8_intercept = update_cr8_intercept, 4330 .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, 4331 .cpu_uses_apicv = svm_cpu_uses_apicv, 4332 .load_eoi_exitmap = svm_load_eoi_exitmap, 4333 .sync_pir_to_irr = svm_sync_pir_to_irr, 4334 4335 .set_tss_addr = svm_set_tss_addr, 4336 .get_tdp_level = get_npt_level, 4337 .get_mt_mask = svm_get_mt_mask, 4338 4339 .get_exit_info = svm_get_exit_info, 4340 4341 .get_lpage_level = svm_get_lpage_level, 4342 4343 .cpuid_update = svm_cpuid_update, 4344 4345 .rdtscp_supported = svm_rdtscp_supported, 4346 .invpcid_supported = svm_invpcid_supported, 4347 .mpx_supported = svm_mpx_supported, 4348 .xsaves_supported = svm_xsaves_supported, 4349 4350 .set_supported_cpuid = svm_set_supported_cpuid, 4351 4352 .has_wbinvd_exit = svm_has_wbinvd_exit, 4353 4354 .read_tsc_offset = svm_read_tsc_offset, 4355 .write_tsc_offset = svm_write_tsc_offset, 4356 .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest, 4357 .read_l1_tsc = svm_read_l1_tsc, 4358 4359 .set_tdp_cr3 = set_tdp_cr3, 4360 4361 .check_intercept = svm_check_intercept, 4362 .handle_external_intr = svm_handle_external_intr, 4363 4364 .sched_in = svm_sched_in, 4365 4366 .pmu_ops = &amd_pmu_ops, 4367}; 4368 4369static int __init svm_init(void) 4370{ 4371 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), 4372 __alignof__(struct vcpu_svm), THIS_MODULE); 4373} 4374 4375static void __exit svm_exit(void) 4376{ 4377 kvm_exit(); 4378} 4379 4380module_init(svm_init) 4381module_exit(svm_exit) 4382