1/* 2 * Kernel-based Virtual Machine driver for Linux 3 * 4 * AMD SVM support 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 8 * 9 * Authors: 10 * Yaniv Kamay <yaniv@qumranet.com> 11 * Avi Kivity <avi@qumranet.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2. See 14 * the COPYING file in the top-level directory. 15 * 16 */ 17#include <linux/kvm_host.h> 18 19#include "irq.h" 20#include "mmu.h" 21#include "kvm_cache_regs.h" 22#include "x86.h" 23#include "cpuid.h" 24 25#include <linux/module.h> 26#include <linux/mod_devicetable.h> 27#include <linux/kernel.h> 28#include <linux/vmalloc.h> 29#include <linux/highmem.h> 30#include <linux/sched.h> 31#include <linux/ftrace_event.h> 32#include <linux/slab.h> 33 34#include <asm/perf_event.h> 35#include <asm/tlbflush.h> 36#include <asm/desc.h> 37#include <asm/debugreg.h> 38#include <asm/kvm_para.h> 39 40#include <asm/virtext.h> 41#include "trace.h" 42 43#define __ex(x) __kvm_handle_fault_on_reboot(x) 44 45MODULE_AUTHOR("Qumranet"); 46MODULE_LICENSE("GPL"); 47 48static const struct x86_cpu_id svm_cpu_id[] = { 49 X86_FEATURE_MATCH(X86_FEATURE_SVM), 50 {} 51}; 52MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); 53 54#define IOPM_ALLOC_ORDER 2 55#define MSRPM_ALLOC_ORDER 1 56 57#define SEG_TYPE_LDT 2 58#define SEG_TYPE_BUSY_TSS16 3 59 60#define SVM_FEATURE_NPT (1 << 0) 61#define SVM_FEATURE_LBRV (1 << 1) 62#define SVM_FEATURE_SVML (1 << 2) 63#define SVM_FEATURE_NRIP (1 << 3) 64#define SVM_FEATURE_TSC_RATE (1 << 4) 65#define SVM_FEATURE_VMCB_CLEAN (1 << 5) 66#define SVM_FEATURE_FLUSH_ASID (1 << 6) 67#define SVM_FEATURE_DECODE_ASSIST (1 << 7) 68#define SVM_FEATURE_PAUSE_FILTER (1 << 10) 69 70#define NESTED_EXIT_HOST 0 /* Exit handled on host level */ 71#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ 72#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */ 73 74#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) 75 76#define TSC_RATIO_RSVD 0xffffff0000000000ULL 77#define TSC_RATIO_MIN 0x0000000000000001ULL 78#define TSC_RATIO_MAX 0x000000ffffffffffULL 79 80static bool erratum_383_found __read_mostly; 81 82static const u32 host_save_user_msrs[] = { 83#ifdef CONFIG_X86_64 84 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, 85 MSR_FS_BASE, 86#endif 87 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 88}; 89 90#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) 91 92struct kvm_vcpu; 93 94struct nested_state { 95 struct vmcb *hsave; 96 u64 hsave_msr; 97 u64 vm_cr_msr; 98 u64 vmcb; 99 100 /* These are the merged vectors */ 101 u32 *msrpm; 102 103 /* gpa pointers to the real vectors */ 104 u64 vmcb_msrpm; 105 u64 vmcb_iopm; 106 107 /* A VMEXIT is required but not yet emulated */ 108 bool exit_required; 109 110 /* cache for intercepts of the guest */ 111 u32 intercept_cr; 112 u32 intercept_dr; 113 u32 intercept_exceptions; 114 u64 intercept; 115 116 /* Nested Paging related state */ 117 u64 nested_cr3; 118}; 119 120#define MSRPM_OFFSETS 16 121static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; 122 123/* 124 * Set osvw_len to higher value when updated Revision Guides 125 * are published and we know what the new status bits are 126 */ 127static uint64_t osvw_len = 4, osvw_status; 128 129struct vcpu_svm { 130 struct kvm_vcpu vcpu; 131 struct vmcb *vmcb; 132 unsigned long vmcb_pa; 133 struct svm_cpu_data *svm_data; 134 uint64_t asid_generation; 135 uint64_t sysenter_esp; 136 uint64_t sysenter_eip; 137 138 u64 next_rip; 139 140 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; 141 struct { 142 u16 fs; 143 u16 gs; 144 u16 ldt; 145 u64 gs_base; 146 } host; 147 148 u32 *msrpm; 149 150 ulong nmi_iret_rip; 151 152 struct nested_state nested; 153 154 bool nmi_singlestep; 155 156 unsigned int3_injected; 157 unsigned long int3_rip; 158 u32 apf_reason; 159 160 u64 tsc_ratio; 161}; 162 163static DEFINE_PER_CPU(u64, current_tsc_ratio); 164#define TSC_RATIO_DEFAULT 0x0100000000ULL 165 166#define MSR_INVALID 0xffffffffU 167 168static const struct svm_direct_access_msrs { 169 u32 index; /* Index of the MSR */ 170 bool always; /* True if intercept is always on */ 171} direct_access_msrs[] = { 172 { .index = MSR_STAR, .always = true }, 173 { .index = MSR_IA32_SYSENTER_CS, .always = true }, 174#ifdef CONFIG_X86_64 175 { .index = MSR_GS_BASE, .always = true }, 176 { .index = MSR_FS_BASE, .always = true }, 177 { .index = MSR_KERNEL_GS_BASE, .always = true }, 178 { .index = MSR_LSTAR, .always = true }, 179 { .index = MSR_CSTAR, .always = true }, 180 { .index = MSR_SYSCALL_MASK, .always = true }, 181#endif 182 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, 183 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, 184 { .index = MSR_IA32_LASTINTFROMIP, .always = false }, 185 { .index = MSR_IA32_LASTINTTOIP, .always = false }, 186 { .index = MSR_INVALID, .always = false }, 187}; 188 189/* enable NPT for AMD64 and X86 with PAE */ 190#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 191static bool npt_enabled = true; 192#else 193static bool npt_enabled; 194#endif 195 196/* allow nested paging (virtualized MMU) for all guests */ 197static int npt = true; 198module_param(npt, int, S_IRUGO); 199 200/* allow nested virtualization in KVM/SVM */ 201static int nested = true; 202module_param(nested, int, S_IRUGO); 203 204static void svm_flush_tlb(struct kvm_vcpu *vcpu); 205static void svm_complete_interrupts(struct vcpu_svm *svm); 206 207static int nested_svm_exit_handled(struct vcpu_svm *svm); 208static int nested_svm_intercept(struct vcpu_svm *svm); 209static int nested_svm_vmexit(struct vcpu_svm *svm); 210static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 211 bool has_error_code, u32 error_code); 212static u64 __scale_tsc(u64 ratio, u64 tsc); 213 214enum { 215 VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, 216 pause filter count */ 217 VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */ 218 VMCB_ASID, /* ASID */ 219 VMCB_INTR, /* int_ctl, int_vector */ 220 VMCB_NPT, /* npt_en, nCR3, gPAT */ 221 VMCB_CR, /* CR0, CR3, CR4, EFER */ 222 VMCB_DR, /* DR6, DR7 */ 223 VMCB_DT, /* GDT, IDT */ 224 VMCB_SEG, /* CS, DS, SS, ES, CPL */ 225 VMCB_CR2, /* CR2 only */ 226 VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ 227 VMCB_DIRTY_MAX, 228}; 229 230/* TPR and CR2 are always written before VMRUN */ 231#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) 232 233static inline void mark_all_dirty(struct vmcb *vmcb) 234{ 235 vmcb->control.clean = 0; 236} 237 238static inline void mark_all_clean(struct vmcb *vmcb) 239{ 240 vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1) 241 & ~VMCB_ALWAYS_DIRTY_MASK; 242} 243 244static inline void mark_dirty(struct vmcb *vmcb, int bit) 245{ 246 vmcb->control.clean &= ~(1 << bit); 247} 248 249static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 250{ 251 return container_of(vcpu, struct vcpu_svm, vcpu); 252} 253 254static void recalc_intercepts(struct vcpu_svm *svm) 255{ 256 struct vmcb_control_area *c, *h; 257 struct nested_state *g; 258 259 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 260 261 if (!is_guest_mode(&svm->vcpu)) 262 return; 263 264 c = &svm->vmcb->control; 265 h = &svm->nested.hsave->control; 266 g = &svm->nested; 267 268 c->intercept_cr = h->intercept_cr | g->intercept_cr; 269 c->intercept_dr = h->intercept_dr | g->intercept_dr; 270 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; 271 c->intercept = h->intercept | g->intercept; 272} 273 274static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm) 275{ 276 if (is_guest_mode(&svm->vcpu)) 277 return svm->nested.hsave; 278 else 279 return svm->vmcb; 280} 281 282static inline void set_cr_intercept(struct vcpu_svm *svm, int bit) 283{ 284 struct vmcb *vmcb = get_host_vmcb(svm); 285 286 vmcb->control.intercept_cr |= (1U << bit); 287 288 recalc_intercepts(svm); 289} 290 291static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit) 292{ 293 struct vmcb *vmcb = get_host_vmcb(svm); 294 295 vmcb->control.intercept_cr &= ~(1U << bit); 296 297 recalc_intercepts(svm); 298} 299 300static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit) 301{ 302 struct vmcb *vmcb = get_host_vmcb(svm); 303 304 return vmcb->control.intercept_cr & (1U << bit); 305} 306 307static inline void set_dr_intercepts(struct vcpu_svm *svm) 308{ 309 struct vmcb *vmcb = get_host_vmcb(svm); 310 311 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ) 312 | (1 << INTERCEPT_DR1_READ) 313 | (1 << INTERCEPT_DR2_READ) 314 | (1 << INTERCEPT_DR3_READ) 315 | (1 << INTERCEPT_DR4_READ) 316 | (1 << INTERCEPT_DR5_READ) 317 | (1 << INTERCEPT_DR6_READ) 318 | (1 << INTERCEPT_DR7_READ) 319 | (1 << INTERCEPT_DR0_WRITE) 320 | (1 << INTERCEPT_DR1_WRITE) 321 | (1 << INTERCEPT_DR2_WRITE) 322 | (1 << INTERCEPT_DR3_WRITE) 323 | (1 << INTERCEPT_DR4_WRITE) 324 | (1 << INTERCEPT_DR5_WRITE) 325 | (1 << INTERCEPT_DR6_WRITE) 326 | (1 << INTERCEPT_DR7_WRITE); 327 328 recalc_intercepts(svm); 329} 330 331static inline void clr_dr_intercepts(struct vcpu_svm *svm) 332{ 333 struct vmcb *vmcb = get_host_vmcb(svm); 334 335 vmcb->control.intercept_dr = 0; 336 337 recalc_intercepts(svm); 338} 339 340static inline void set_exception_intercept(struct vcpu_svm *svm, int bit) 341{ 342 struct vmcb *vmcb = get_host_vmcb(svm); 343 344 vmcb->control.intercept_exceptions |= (1U << bit); 345 346 recalc_intercepts(svm); 347} 348 349static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit) 350{ 351 struct vmcb *vmcb = get_host_vmcb(svm); 352 353 vmcb->control.intercept_exceptions &= ~(1U << bit); 354 355 recalc_intercepts(svm); 356} 357 358static inline void set_intercept(struct vcpu_svm *svm, int bit) 359{ 360 struct vmcb *vmcb = get_host_vmcb(svm); 361 362 vmcb->control.intercept |= (1ULL << bit); 363 364 recalc_intercepts(svm); 365} 366 367static inline void clr_intercept(struct vcpu_svm *svm, int bit) 368{ 369 struct vmcb *vmcb = get_host_vmcb(svm); 370 371 vmcb->control.intercept &= ~(1ULL << bit); 372 373 recalc_intercepts(svm); 374} 375 376static inline void enable_gif(struct vcpu_svm *svm) 377{ 378 svm->vcpu.arch.hflags |= HF_GIF_MASK; 379} 380 381static inline void disable_gif(struct vcpu_svm *svm) 382{ 383 svm->vcpu.arch.hflags &= ~HF_GIF_MASK; 384} 385 386static inline bool gif_set(struct vcpu_svm *svm) 387{ 388 return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); 389} 390 391static unsigned long iopm_base; 392 393struct kvm_ldttss_desc { 394 u16 limit0; 395 u16 base0; 396 unsigned base1:8, type:5, dpl:2, p:1; 397 unsigned limit1:4, zero0:3, g:1, base2:8; 398 u32 base3; 399 u32 zero1; 400} __attribute__((packed)); 401 402struct svm_cpu_data { 403 int cpu; 404 405 u64 asid_generation; 406 u32 max_asid; 407 u32 next_asid; 408 struct kvm_ldttss_desc *tss_desc; 409 410 struct page *save_area; 411}; 412 413static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); 414 415struct svm_init_data { 416 int cpu; 417 int r; 418}; 419 420static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; 421 422#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges) 423#define MSRS_RANGE_SIZE 2048 424#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) 425 426static u32 svm_msrpm_offset(u32 msr) 427{ 428 u32 offset; 429 int i; 430 431 for (i = 0; i < NUM_MSR_MAPS; i++) { 432 if (msr < msrpm_ranges[i] || 433 msr >= msrpm_ranges[i] + MSRS_IN_RANGE) 434 continue; 435 436 offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ 437 offset += (i * MSRS_RANGE_SIZE); /* add range offset */ 438 439 /* Now we have the u8 offset - but need the u32 offset */ 440 return offset / 4; 441 } 442 443 /* MSR not in any range */ 444 return MSR_INVALID; 445} 446 447#define MAX_INST_SIZE 15 448 449static inline void clgi(void) 450{ 451 asm volatile (__ex(SVM_CLGI)); 452} 453 454static inline void stgi(void) 455{ 456 asm volatile (__ex(SVM_STGI)); 457} 458 459static inline void invlpga(unsigned long addr, u32 asid) 460{ 461 asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); 462} 463 464static int get_npt_level(void) 465{ 466#ifdef CONFIG_X86_64 467 return PT64_ROOT_LEVEL; 468#else 469 return PT32E_ROOT_LEVEL; 470#endif 471} 472 473static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) 474{ 475 vcpu->arch.efer = efer; 476 if (!npt_enabled && !(efer & EFER_LMA)) 477 efer &= ~EFER_LME; 478 479 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; 480 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); 481} 482 483static int is_external_interrupt(u32 info) 484{ 485 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; 486 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); 487} 488 489static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) 490{ 491 struct vcpu_svm *svm = to_svm(vcpu); 492 u32 ret = 0; 493 494 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) 495 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; 496 return ret; 497} 498 499static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 500{ 501 struct vcpu_svm *svm = to_svm(vcpu); 502 503 if (mask == 0) 504 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; 505 else 506 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; 507 508} 509 510static void skip_emulated_instruction(struct kvm_vcpu *vcpu) 511{ 512 struct vcpu_svm *svm = to_svm(vcpu); 513 514 if (svm->vmcb->control.next_rip != 0) { 515 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); 516 svm->next_rip = svm->vmcb->control.next_rip; 517 } 518 519 if (!svm->next_rip) { 520 if (emulate_instruction(vcpu, EMULTYPE_SKIP) != 521 EMULATE_DONE) 522 printk(KERN_DEBUG "%s: NOP\n", __func__); 523 return; 524 } 525 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) 526 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n", 527 __func__, kvm_rip_read(vcpu), svm->next_rip); 528 529 kvm_rip_write(vcpu, svm->next_rip); 530 svm_set_interrupt_shadow(vcpu, 0); 531} 532 533static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 534 bool has_error_code, u32 error_code, 535 bool reinject) 536{ 537 struct vcpu_svm *svm = to_svm(vcpu); 538 539 /* 540 * If we are within a nested VM we'd better #VMEXIT and let the guest 541 * handle the exception 542 */ 543 if (!reinject && 544 nested_svm_check_exception(svm, nr, has_error_code, error_code)) 545 return; 546 547 if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { 548 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); 549 550 /* 551 * For guest debugging where we have to reinject #BP if some 552 * INT3 is guest-owned: 553 * Emulate nRIP by moving RIP forward. Will fail if injection 554 * raises a fault that is not intercepted. Still better than 555 * failing in all cases. 556 */ 557 skip_emulated_instruction(&svm->vcpu); 558 rip = kvm_rip_read(&svm->vcpu); 559 svm->int3_rip = rip + svm->vmcb->save.cs.base; 560 svm->int3_injected = rip - old_rip; 561 } 562 563 svm->vmcb->control.event_inj = nr 564 | SVM_EVTINJ_VALID 565 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) 566 | SVM_EVTINJ_TYPE_EXEPT; 567 svm->vmcb->control.event_inj_err = error_code; 568} 569 570static void svm_init_erratum_383(void) 571{ 572 u32 low, high; 573 int err; 574 u64 val; 575 576 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH)) 577 return; 578 579 /* Use _safe variants to not break nested virtualization */ 580 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err); 581 if (err) 582 return; 583 584 val |= (1ULL << 47); 585 586 low = lower_32_bits(val); 587 high = upper_32_bits(val); 588 589 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high); 590 591 erratum_383_found = true; 592} 593 594static void svm_init_osvw(struct kvm_vcpu *vcpu) 595{ 596 /* 597 * Guests should see errata 400 and 415 as fixed (assuming that 598 * HLT and IO instructions are intercepted). 599 */ 600 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3; 601 vcpu->arch.osvw.status = osvw_status & ~(6ULL); 602 603 /* 604 * By increasing VCPU's osvw.length to 3 we are telling the guest that 605 * all osvw.status bits inside that length, including bit 0 (which is 606 * reserved for erratum 298), are valid. However, if host processor's 607 * osvw_len is 0 then osvw_status[0] carries no information. We need to 608 * be conservative here and therefore we tell the guest that erratum 298 609 * is present (because we really don't know). 610 */ 611 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10) 612 vcpu->arch.osvw.status |= 1; 613} 614 615static int has_svm(void) 616{ 617 const char *msg; 618 619 if (!cpu_has_svm(&msg)) { 620 printk(KERN_INFO "has_svm: %s\n", msg); 621 return 0; 622 } 623 624 return 1; 625} 626 627static void svm_hardware_disable(void) 628{ 629 /* Make sure we clean up behind us */ 630 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) 631 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); 632 633 cpu_svm_disable(); 634 635 amd_pmu_disable_virt(); 636} 637 638static int svm_hardware_enable(void) 639{ 640 641 struct svm_cpu_data *sd; 642 uint64_t efer; 643 struct desc_ptr gdt_descr; 644 struct desc_struct *gdt; 645 int me = raw_smp_processor_id(); 646 647 rdmsrl(MSR_EFER, efer); 648 if (efer & EFER_SVME) 649 return -EBUSY; 650 651 if (!has_svm()) { 652 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me); 653 return -EINVAL; 654 } 655 sd = per_cpu(svm_data, me); 656 if (!sd) { 657 pr_err("%s: svm_data is NULL on %d\n", __func__, me); 658 return -EINVAL; 659 } 660 661 sd->asid_generation = 1; 662 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 663 sd->next_asid = sd->max_asid + 1; 664 665 native_store_gdt(&gdt_descr); 666 gdt = (struct desc_struct *)gdt_descr.address; 667 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 668 669 wrmsrl(MSR_EFER, efer | EFER_SVME); 670 671 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); 672 673 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { 674 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); 675 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT); 676 } 677 678 679 /* 680 * Get OSVW bits. 681 * 682 * Note that it is possible to have a system with mixed processor 683 * revisions and therefore different OSVW bits. If bits are not the same 684 * on different processors then choose the worst case (i.e. if erratum 685 * is present on one processor and not on another then assume that the 686 * erratum is present everywhere). 687 */ 688 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) { 689 uint64_t len, status = 0; 690 int err; 691 692 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err); 693 if (!err) 694 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, 695 &err); 696 697 if (err) 698 osvw_status = osvw_len = 0; 699 else { 700 if (len < osvw_len) 701 osvw_len = len; 702 osvw_status |= status; 703 osvw_status &= (1ULL << osvw_len) - 1; 704 } 705 } else 706 osvw_status = osvw_len = 0; 707 708 svm_init_erratum_383(); 709 710 amd_pmu_enable_virt(); 711 712 return 0; 713} 714 715static void svm_cpu_uninit(int cpu) 716{ 717 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id()); 718 719 if (!sd) 720 return; 721 722 per_cpu(svm_data, raw_smp_processor_id()) = NULL; 723 __free_page(sd->save_area); 724 kfree(sd); 725} 726 727static int svm_cpu_init(int cpu) 728{ 729 struct svm_cpu_data *sd; 730 int r; 731 732 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); 733 if (!sd) 734 return -ENOMEM; 735 sd->cpu = cpu; 736 sd->save_area = alloc_page(GFP_KERNEL); 737 r = -ENOMEM; 738 if (!sd->save_area) 739 goto err_1; 740 741 per_cpu(svm_data, cpu) = sd; 742 743 return 0; 744 745err_1: 746 kfree(sd); 747 return r; 748 749} 750 751static bool valid_msr_intercept(u32 index) 752{ 753 int i; 754 755 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) 756 if (direct_access_msrs[i].index == index) 757 return true; 758 759 return false; 760} 761 762static void set_msr_interception(u32 *msrpm, unsigned msr, 763 int read, int write) 764{ 765 u8 bit_read, bit_write; 766 unsigned long tmp; 767 u32 offset; 768 769 /* 770 * If this warning triggers extend the direct_access_msrs list at the 771 * beginning of the file 772 */ 773 WARN_ON(!valid_msr_intercept(msr)); 774 775 offset = svm_msrpm_offset(msr); 776 bit_read = 2 * (msr & 0x0f); 777 bit_write = 2 * (msr & 0x0f) + 1; 778 tmp = msrpm[offset]; 779 780 BUG_ON(offset == MSR_INVALID); 781 782 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); 783 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); 784 785 msrpm[offset] = tmp; 786} 787 788static void svm_vcpu_init_msrpm(u32 *msrpm) 789{ 790 int i; 791 792 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); 793 794 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { 795 if (!direct_access_msrs[i].always) 796 continue; 797 798 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); 799 } 800} 801 802static void add_msr_offset(u32 offset) 803{ 804 int i; 805 806 for (i = 0; i < MSRPM_OFFSETS; ++i) { 807 808 /* Offset already in list? */ 809 if (msrpm_offsets[i] == offset) 810 return; 811 812 /* Slot used by another offset? */ 813 if (msrpm_offsets[i] != MSR_INVALID) 814 continue; 815 816 /* Add offset to list */ 817 msrpm_offsets[i] = offset; 818 819 return; 820 } 821 822 /* 823 * If this BUG triggers the msrpm_offsets table has an overflow. Just 824 * increase MSRPM_OFFSETS in this case. 825 */ 826 BUG(); 827} 828 829static void init_msrpm_offsets(void) 830{ 831 int i; 832 833 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); 834 835 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { 836 u32 offset; 837 838 offset = svm_msrpm_offset(direct_access_msrs[i].index); 839 BUG_ON(offset == MSR_INVALID); 840 841 add_msr_offset(offset); 842 } 843} 844 845static void svm_enable_lbrv(struct vcpu_svm *svm) 846{ 847 u32 *msrpm = svm->msrpm; 848 849 svm->vmcb->control.lbr_ctl = 1; 850 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); 851 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); 852 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); 853 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1); 854} 855 856static void svm_disable_lbrv(struct vcpu_svm *svm) 857{ 858 u32 *msrpm = svm->msrpm; 859 860 svm->vmcb->control.lbr_ctl = 0; 861 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); 862 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); 863 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); 864 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); 865} 866 867static __init int svm_hardware_setup(void) 868{ 869 int cpu; 870 struct page *iopm_pages; 871 void *iopm_va; 872 int r; 873 874 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); 875 876 if (!iopm_pages) 877 return -ENOMEM; 878 879 iopm_va = page_address(iopm_pages); 880 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); 881 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; 882 883 init_msrpm_offsets(); 884 885 if (boot_cpu_has(X86_FEATURE_NX)) 886 kvm_enable_efer_bits(EFER_NX); 887 888 if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) 889 kvm_enable_efer_bits(EFER_FFXSR); 890 891 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { 892 u64 max; 893 894 kvm_has_tsc_control = true; 895 896 /* 897 * Make sure the user can only configure tsc_khz values that 898 * fit into a signed integer. 899 * A min value is not calculated needed because it will always 900 * be 1 on all machines and a value of 0 is used to disable 901 * tsc-scaling for the vcpu. 902 */ 903 max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); 904 905 kvm_max_guest_tsc_khz = max; 906 } 907 908 if (nested) { 909 printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); 910 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); 911 } 912 913 for_each_possible_cpu(cpu) { 914 r = svm_cpu_init(cpu); 915 if (r) 916 goto err; 917 } 918 919 if (!boot_cpu_has(X86_FEATURE_NPT)) 920 npt_enabled = false; 921 922 if (npt_enabled && !npt) { 923 printk(KERN_INFO "kvm: Nested Paging disabled\n"); 924 npt_enabled = false; 925 } 926 927 if (npt_enabled) { 928 printk(KERN_INFO "kvm: Nested Paging enabled\n"); 929 kvm_enable_tdp(); 930 } else 931 kvm_disable_tdp(); 932 933 return 0; 934 935err: 936 __free_pages(iopm_pages, IOPM_ALLOC_ORDER); 937 iopm_base = 0; 938 return r; 939} 940 941static __exit void svm_hardware_unsetup(void) 942{ 943 int cpu; 944 945 for_each_possible_cpu(cpu) 946 svm_cpu_uninit(cpu); 947 948 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); 949 iopm_base = 0; 950} 951 952static void init_seg(struct vmcb_seg *seg) 953{ 954 seg->selector = 0; 955 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | 956 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ 957 seg->limit = 0xffff; 958 seg->base = 0; 959} 960 961static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) 962{ 963 seg->selector = 0; 964 seg->attrib = SVM_SELECTOR_P_MASK | type; 965 seg->limit = 0xffff; 966 seg->base = 0; 967} 968 969static u64 __scale_tsc(u64 ratio, u64 tsc) 970{ 971 u64 mult, frac, _tsc; 972 973 mult = ratio >> 32; 974 frac = ratio & ((1ULL << 32) - 1); 975 976 _tsc = tsc; 977 _tsc *= mult; 978 _tsc += (tsc >> 32) * frac; 979 _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; 980 981 return _tsc; 982} 983 984static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) 985{ 986 struct vcpu_svm *svm = to_svm(vcpu); 987 u64 _tsc = tsc; 988 989 if (svm->tsc_ratio != TSC_RATIO_DEFAULT) 990 _tsc = __scale_tsc(svm->tsc_ratio, tsc); 991 992 return _tsc; 993} 994 995static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) 996{ 997 struct vcpu_svm *svm = to_svm(vcpu); 998 u64 ratio; 999 u64 khz; 1000 1001 /* Guest TSC same frequency as host TSC? */ 1002 if (!scale) { 1003 svm->tsc_ratio = TSC_RATIO_DEFAULT; 1004 return; 1005 } 1006 1007 /* TSC scaling supported? */ 1008 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { 1009 if (user_tsc_khz > tsc_khz) { 1010 vcpu->arch.tsc_catchup = 1; 1011 vcpu->arch.tsc_always_catchup = 1; 1012 } else 1013 WARN(1, "user requested TSC rate below hardware speed\n"); 1014 return; 1015 } 1016 1017 khz = user_tsc_khz; 1018 1019 /* TSC scaling required - calculate ratio */ 1020 ratio = khz << 32; 1021 do_div(ratio, tsc_khz); 1022 1023 if (ratio == 0 || ratio & TSC_RATIO_RSVD) { 1024 WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", 1025 user_tsc_khz); 1026 return; 1027 } 1028 svm->tsc_ratio = ratio; 1029} 1030 1031static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu) 1032{ 1033 struct vcpu_svm *svm = to_svm(vcpu); 1034 1035 return svm->vmcb->control.tsc_offset; 1036} 1037 1038static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) 1039{ 1040 struct vcpu_svm *svm = to_svm(vcpu); 1041 u64 g_tsc_offset = 0; 1042 1043 if (is_guest_mode(vcpu)) { 1044 g_tsc_offset = svm->vmcb->control.tsc_offset - 1045 svm->nested.hsave->control.tsc_offset; 1046 svm->nested.hsave->control.tsc_offset = offset; 1047 } else 1048 trace_kvm_write_tsc_offset(vcpu->vcpu_id, 1049 svm->vmcb->control.tsc_offset, 1050 offset); 1051 1052 svm->vmcb->control.tsc_offset = offset + g_tsc_offset; 1053 1054 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 1055} 1056 1057static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) 1058{ 1059 struct vcpu_svm *svm = to_svm(vcpu); 1060 1061 if (host) { 1062 if (svm->tsc_ratio != TSC_RATIO_DEFAULT) 1063 WARN_ON(adjustment < 0); 1064 adjustment = svm_scale_tsc(vcpu, (u64)adjustment); 1065 } 1066 1067 svm->vmcb->control.tsc_offset += adjustment; 1068 if (is_guest_mode(vcpu)) 1069 svm->nested.hsave->control.tsc_offset += adjustment; 1070 else 1071 trace_kvm_write_tsc_offset(vcpu->vcpu_id, 1072 svm->vmcb->control.tsc_offset - adjustment, 1073 svm->vmcb->control.tsc_offset); 1074 1075 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 1076} 1077 1078static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) 1079{ 1080 u64 tsc; 1081 1082 tsc = svm_scale_tsc(vcpu, native_read_tsc()); 1083 1084 return target_tsc - tsc; 1085} 1086 1087static void init_vmcb(struct vcpu_svm *svm) 1088{ 1089 struct vmcb_control_area *control = &svm->vmcb->control; 1090 struct vmcb_save_area *save = &svm->vmcb->save; 1091 1092 svm->vcpu.fpu_active = 1; 1093 svm->vcpu.arch.hflags = 0; 1094 1095 set_cr_intercept(svm, INTERCEPT_CR0_READ); 1096 set_cr_intercept(svm, INTERCEPT_CR3_READ); 1097 set_cr_intercept(svm, INTERCEPT_CR4_READ); 1098 set_cr_intercept(svm, INTERCEPT_CR0_WRITE); 1099 set_cr_intercept(svm, INTERCEPT_CR3_WRITE); 1100 set_cr_intercept(svm, INTERCEPT_CR4_WRITE); 1101 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 1102 1103 set_dr_intercepts(svm); 1104 1105 set_exception_intercept(svm, PF_VECTOR); 1106 set_exception_intercept(svm, UD_VECTOR); 1107 set_exception_intercept(svm, MC_VECTOR); 1108 set_exception_intercept(svm, AC_VECTOR); 1109 set_exception_intercept(svm, DB_VECTOR); 1110 1111 set_intercept(svm, INTERCEPT_INTR); 1112 set_intercept(svm, INTERCEPT_NMI); 1113 set_intercept(svm, INTERCEPT_SMI); 1114 set_intercept(svm, INTERCEPT_SELECTIVE_CR0); 1115 set_intercept(svm, INTERCEPT_RDPMC); 1116 set_intercept(svm, INTERCEPT_CPUID); 1117 set_intercept(svm, INTERCEPT_INVD); 1118 set_intercept(svm, INTERCEPT_HLT); 1119 set_intercept(svm, INTERCEPT_INVLPG); 1120 set_intercept(svm, INTERCEPT_INVLPGA); 1121 set_intercept(svm, INTERCEPT_IOIO_PROT); 1122 set_intercept(svm, INTERCEPT_MSR_PROT); 1123 set_intercept(svm, INTERCEPT_TASK_SWITCH); 1124 set_intercept(svm, INTERCEPT_SHUTDOWN); 1125 set_intercept(svm, INTERCEPT_VMRUN); 1126 set_intercept(svm, INTERCEPT_VMMCALL); 1127 set_intercept(svm, INTERCEPT_VMLOAD); 1128 set_intercept(svm, INTERCEPT_VMSAVE); 1129 set_intercept(svm, INTERCEPT_STGI); 1130 set_intercept(svm, INTERCEPT_CLGI); 1131 set_intercept(svm, INTERCEPT_SKINIT); 1132 set_intercept(svm, INTERCEPT_WBINVD); 1133 set_intercept(svm, INTERCEPT_MONITOR); 1134 set_intercept(svm, INTERCEPT_MWAIT); 1135 set_intercept(svm, INTERCEPT_XSETBV); 1136 1137 control->iopm_base_pa = iopm_base; 1138 control->msrpm_base_pa = __pa(svm->msrpm); 1139 control->int_ctl = V_INTR_MASKING_MASK; 1140 1141 init_seg(&save->es); 1142 init_seg(&save->ss); 1143 init_seg(&save->ds); 1144 init_seg(&save->fs); 1145 init_seg(&save->gs); 1146 1147 save->cs.selector = 0xf000; 1148 save->cs.base = 0xffff0000; 1149 /* Executable/Readable Code Segment */ 1150 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | 1151 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; 1152 save->cs.limit = 0xffff; 1153 1154 save->gdtr.limit = 0xffff; 1155 save->idtr.limit = 0xffff; 1156 1157 init_sys_seg(&save->ldtr, SEG_TYPE_LDT); 1158 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); 1159 1160 svm_set_efer(&svm->vcpu, 0); 1161 save->dr6 = 0xffff0ff0; 1162 kvm_set_rflags(&svm->vcpu, 2); 1163 save->rip = 0x0000fff0; 1164 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; 1165 1166 /* 1167 * This is the guest-visible cr0 value. 1168 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. 1169 */ 1170 svm->vcpu.arch.cr0 = 0; 1171 (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); 1172 1173 save->cr4 = X86_CR4_PAE; 1174 /* rdx = ?? */ 1175 1176 if (npt_enabled) { 1177 /* Setup VMCB for Nested Paging */ 1178 control->nested_ctl = 1; 1179 clr_intercept(svm, INTERCEPT_INVLPG); 1180 clr_exception_intercept(svm, PF_VECTOR); 1181 clr_cr_intercept(svm, INTERCEPT_CR3_READ); 1182 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); 1183 save->g_pat = 0x0007040600070406ULL; 1184 save->cr3 = 0; 1185 save->cr4 = 0; 1186 } 1187 svm->asid_generation = 0; 1188 1189 svm->nested.vmcb = 0; 1190 svm->vcpu.arch.hflags = 0; 1191 1192 if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { 1193 control->pause_filter_count = 3000; 1194 set_intercept(svm, INTERCEPT_PAUSE); 1195 } 1196 1197 mark_all_dirty(svm->vmcb); 1198 1199 enable_gif(svm); 1200} 1201 1202static void svm_vcpu_reset(struct kvm_vcpu *vcpu) 1203{ 1204 struct vcpu_svm *svm = to_svm(vcpu); 1205 u32 dummy; 1206 u32 eax = 1; 1207 1208 init_vmcb(svm); 1209 1210 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); 1211 kvm_register_write(vcpu, VCPU_REGS_RDX, eax); 1212} 1213 1214static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) 1215{ 1216 struct vcpu_svm *svm; 1217 struct page *page; 1218 struct page *msrpm_pages; 1219 struct page *hsave_page; 1220 struct page *nested_msrpm_pages; 1221 int err; 1222 1223 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 1224 if (!svm) { 1225 err = -ENOMEM; 1226 goto out; 1227 } 1228 1229 svm->tsc_ratio = TSC_RATIO_DEFAULT; 1230 1231 err = kvm_vcpu_init(&svm->vcpu, kvm, id); 1232 if (err) 1233 goto free_svm; 1234 1235 err = -ENOMEM; 1236 page = alloc_page(GFP_KERNEL); 1237 if (!page) 1238 goto uninit; 1239 1240 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 1241 if (!msrpm_pages) 1242 goto free_page1; 1243 1244 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 1245 if (!nested_msrpm_pages) 1246 goto free_page2; 1247 1248 hsave_page = alloc_page(GFP_KERNEL); 1249 if (!hsave_page) 1250 goto free_page3; 1251 1252 svm->nested.hsave = page_address(hsave_page); 1253 1254 svm->msrpm = page_address(msrpm_pages); 1255 svm_vcpu_init_msrpm(svm->msrpm); 1256 1257 svm->nested.msrpm = page_address(nested_msrpm_pages); 1258 svm_vcpu_init_msrpm(svm->nested.msrpm); 1259 1260 svm->vmcb = page_address(page); 1261 clear_page(svm->vmcb); 1262 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 1263 svm->asid_generation = 0; 1264 init_vmcb(svm); 1265 1266 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | 1267 MSR_IA32_APICBASE_ENABLE; 1268 if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) 1269 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 1270 1271 svm_init_osvw(&svm->vcpu); 1272 1273 return &svm->vcpu; 1274 1275free_page3: 1276 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); 1277free_page2: 1278 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); 1279free_page1: 1280 __free_page(page); 1281uninit: 1282 kvm_vcpu_uninit(&svm->vcpu); 1283free_svm: 1284 kmem_cache_free(kvm_vcpu_cache, svm); 1285out: 1286 return ERR_PTR(err); 1287} 1288 1289static void svm_free_vcpu(struct kvm_vcpu *vcpu) 1290{ 1291 struct vcpu_svm *svm = to_svm(vcpu); 1292 1293 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); 1294 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); 1295 __free_page(virt_to_page(svm->nested.hsave)); 1296 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); 1297 kvm_vcpu_uninit(vcpu); 1298 kmem_cache_free(kvm_vcpu_cache, svm); 1299} 1300 1301static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1302{ 1303 struct vcpu_svm *svm = to_svm(vcpu); 1304 int i; 1305 1306 if (unlikely(cpu != vcpu->cpu)) { 1307 svm->asid_generation = 0; 1308 mark_all_dirty(svm->vmcb); 1309 } 1310 1311#ifdef CONFIG_X86_64 1312 rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); 1313#endif 1314 savesegment(fs, svm->host.fs); 1315 savesegment(gs, svm->host.gs); 1316 svm->host.ldt = kvm_read_ldt(); 1317 1318 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 1319 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1320 1321 if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && 1322 svm->tsc_ratio != __this_cpu_read(current_tsc_ratio)) { 1323 __this_cpu_write(current_tsc_ratio, svm->tsc_ratio); 1324 wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); 1325 } 1326} 1327 1328static void svm_vcpu_put(struct kvm_vcpu *vcpu) 1329{ 1330 struct vcpu_svm *svm = to_svm(vcpu); 1331 int i; 1332 1333 ++vcpu->stat.host_state_reload; 1334 kvm_load_ldt(svm->host.ldt); 1335#ifdef CONFIG_X86_64 1336 loadsegment(fs, svm->host.fs); 1337 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); 1338 load_gs_index(svm->host.gs); 1339#else 1340#ifdef CONFIG_X86_32_LAZY_GS 1341 loadsegment(gs, svm->host.gs); 1342#endif 1343#endif 1344 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 1345 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1346} 1347 1348static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 1349{ 1350 return to_svm(vcpu)->vmcb->save.rflags; 1351} 1352 1353static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1354{ 1355 /* 1356 * Any change of EFLAGS.VM is accompained by a reload of SS 1357 * (caused by either a task switch or an inter-privilege IRET), 1358 * so we do not need to update the CPL here. 1359 */ 1360 to_svm(vcpu)->vmcb->save.rflags = rflags; 1361} 1362 1363static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) 1364{ 1365 switch (reg) { 1366 case VCPU_EXREG_PDPTR: 1367 BUG_ON(!npt_enabled); 1368 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); 1369 break; 1370 default: 1371 BUG(); 1372 } 1373} 1374 1375static void svm_set_vintr(struct vcpu_svm *svm) 1376{ 1377 set_intercept(svm, INTERCEPT_VINTR); 1378} 1379 1380static void svm_clear_vintr(struct vcpu_svm *svm) 1381{ 1382 clr_intercept(svm, INTERCEPT_VINTR); 1383} 1384 1385static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) 1386{ 1387 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; 1388 1389 switch (seg) { 1390 case VCPU_SREG_CS: return &save->cs; 1391 case VCPU_SREG_DS: return &save->ds; 1392 case VCPU_SREG_ES: return &save->es; 1393 case VCPU_SREG_FS: return &save->fs; 1394 case VCPU_SREG_GS: return &save->gs; 1395 case VCPU_SREG_SS: return &save->ss; 1396 case VCPU_SREG_TR: return &save->tr; 1397 case VCPU_SREG_LDTR: return &save->ldtr; 1398 } 1399 BUG(); 1400 return NULL; 1401} 1402 1403static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg) 1404{ 1405 struct vmcb_seg *s = svm_seg(vcpu, seg); 1406 1407 return s->base; 1408} 1409 1410static void svm_get_segment(struct kvm_vcpu *vcpu, 1411 struct kvm_segment *var, int seg) 1412{ 1413 struct vmcb_seg *s = svm_seg(vcpu, seg); 1414 1415 var->base = s->base; 1416 var->limit = s->limit; 1417 var->selector = s->selector; 1418 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK; 1419 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1; 1420 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; 1421 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1; 1422 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1; 1423 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 1424 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 1425 1426 /* 1427 * AMD CPUs circa 2014 track the G bit for all segments except CS. 1428 * However, the SVM spec states that the G bit is not observed by the 1429 * CPU, and some VMware virtual CPUs drop the G bit for all segments. 1430 * So let's synthesize a legal G bit for all segments, this helps 1431 * running KVM nested. It also helps cross-vendor migration, because 1432 * Intel's vmentry has a check on the 'G' bit. 1433 */ 1434 var->g = s->limit > 0xfffff; 1435 1436 /* 1437 * AMD's VMCB does not have an explicit unusable field, so emulate it 1438 * for cross vendor migration purposes by "not present" 1439 */ 1440 var->unusable = !var->present || (var->type == 0); 1441 1442 switch (seg) { 1443 case VCPU_SREG_TR: 1444 /* 1445 * Work around a bug where the busy flag in the tr selector 1446 * isn't exposed 1447 */ 1448 var->type |= 0x2; 1449 break; 1450 case VCPU_SREG_DS: 1451 case VCPU_SREG_ES: 1452 case VCPU_SREG_FS: 1453 case VCPU_SREG_GS: 1454 /* 1455 * The accessed bit must always be set in the segment 1456 * descriptor cache, although it can be cleared in the 1457 * descriptor, the cached bit always remains at 1. Since 1458 * Intel has a check on this, set it here to support 1459 * cross-vendor migration. 1460 */ 1461 if (!var->unusable) 1462 var->type |= 0x1; 1463 break; 1464 case VCPU_SREG_SS: 1465 /* 1466 * On AMD CPUs sometimes the DB bit in the segment 1467 * descriptor is left as 1, although the whole segment has 1468 * been made unusable. Clear it here to pass an Intel VMX 1469 * entry check when cross vendor migrating. 1470 */ 1471 if (var->unusable) 1472 var->db = 0; 1473 var->dpl = to_svm(vcpu)->vmcb->save.cpl; 1474 break; 1475 } 1476} 1477 1478static int svm_get_cpl(struct kvm_vcpu *vcpu) 1479{ 1480 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; 1481 1482 return save->cpl; 1483} 1484 1485static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1486{ 1487 struct vcpu_svm *svm = to_svm(vcpu); 1488 1489 dt->size = svm->vmcb->save.idtr.limit; 1490 dt->address = svm->vmcb->save.idtr.base; 1491} 1492 1493static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1494{ 1495 struct vcpu_svm *svm = to_svm(vcpu); 1496 1497 svm->vmcb->save.idtr.limit = dt->size; 1498 svm->vmcb->save.idtr.base = dt->address ; 1499 mark_dirty(svm->vmcb, VMCB_DT); 1500} 1501 1502static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1503{ 1504 struct vcpu_svm *svm = to_svm(vcpu); 1505 1506 dt->size = svm->vmcb->save.gdtr.limit; 1507 dt->address = svm->vmcb->save.gdtr.base; 1508} 1509 1510static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1511{ 1512 struct vcpu_svm *svm = to_svm(vcpu); 1513 1514 svm->vmcb->save.gdtr.limit = dt->size; 1515 svm->vmcb->save.gdtr.base = dt->address ; 1516 mark_dirty(svm->vmcb, VMCB_DT); 1517} 1518 1519static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) 1520{ 1521} 1522 1523static void svm_decache_cr3(struct kvm_vcpu *vcpu) 1524{ 1525} 1526 1527static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1528{ 1529} 1530 1531static void update_cr0_intercept(struct vcpu_svm *svm) 1532{ 1533 ulong gcr0 = svm->vcpu.arch.cr0; 1534 u64 *hcr0 = &svm->vmcb->save.cr0; 1535 1536 if (!svm->vcpu.fpu_active) 1537 *hcr0 |= SVM_CR0_SELECTIVE_MASK; 1538 else 1539 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) 1540 | (gcr0 & SVM_CR0_SELECTIVE_MASK); 1541 1542 mark_dirty(svm->vmcb, VMCB_CR); 1543 1544 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { 1545 clr_cr_intercept(svm, INTERCEPT_CR0_READ); 1546 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); 1547 } else { 1548 set_cr_intercept(svm, INTERCEPT_CR0_READ); 1549 set_cr_intercept(svm, INTERCEPT_CR0_WRITE); 1550 } 1551} 1552 1553static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1554{ 1555 struct vcpu_svm *svm = to_svm(vcpu); 1556 1557#ifdef CONFIG_X86_64 1558 if (vcpu->arch.efer & EFER_LME) { 1559 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 1560 vcpu->arch.efer |= EFER_LMA; 1561 svm->vmcb->save.efer |= EFER_LMA | EFER_LME; 1562 } 1563 1564 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { 1565 vcpu->arch.efer &= ~EFER_LMA; 1566 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); 1567 } 1568 } 1569#endif 1570 vcpu->arch.cr0 = cr0; 1571 1572 if (!npt_enabled) 1573 cr0 |= X86_CR0_PG | X86_CR0_WP; 1574 1575 if (!vcpu->fpu_active) 1576 cr0 |= X86_CR0_TS; 1577 /* 1578 * re-enable caching here because the QEMU bios 1579 * does not do it - this results in some delay at 1580 * reboot 1581 */ 1582 cr0 &= ~(X86_CR0_CD | X86_CR0_NW); 1583 svm->vmcb->save.cr0 = cr0; 1584 mark_dirty(svm->vmcb, VMCB_CR); 1585 update_cr0_intercept(svm); 1586} 1587 1588static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1589{ 1590 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; 1591 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; 1592 1593 if (cr4 & X86_CR4_VMXE) 1594 return 1; 1595 1596 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) 1597 svm_flush_tlb(vcpu); 1598 1599 vcpu->arch.cr4 = cr4; 1600 if (!npt_enabled) 1601 cr4 |= X86_CR4_PAE; 1602 cr4 |= host_cr4_mce; 1603 to_svm(vcpu)->vmcb->save.cr4 = cr4; 1604 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); 1605 return 0; 1606} 1607 1608static void svm_set_segment(struct kvm_vcpu *vcpu, 1609 struct kvm_segment *var, int seg) 1610{ 1611 struct vcpu_svm *svm = to_svm(vcpu); 1612 struct vmcb_seg *s = svm_seg(vcpu, seg); 1613 1614 s->base = var->base; 1615 s->limit = var->limit; 1616 s->selector = var->selector; 1617 if (var->unusable) 1618 s->attrib = 0; 1619 else { 1620 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); 1621 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; 1622 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; 1623 s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; 1624 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; 1625 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; 1626 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; 1627 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; 1628 } 1629 1630 /* 1631 * This is always accurate, except if SYSRET returned to a segment 1632 * with SS.DPL != 3. Intel does not have this quirk, and always 1633 * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it 1634 * would entail passing the CPL to userspace and back. 1635 */ 1636 if (seg == VCPU_SREG_SS) 1637 svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; 1638 1639 mark_dirty(svm->vmcb, VMCB_SEG); 1640} 1641 1642static void update_bp_intercept(struct kvm_vcpu *vcpu) 1643{ 1644 struct vcpu_svm *svm = to_svm(vcpu); 1645 1646 clr_exception_intercept(svm, BP_VECTOR); 1647 1648 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { 1649 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 1650 set_exception_intercept(svm, BP_VECTOR); 1651 } else 1652 vcpu->guest_debug = 0; 1653} 1654 1655static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) 1656{ 1657 if (sd->next_asid > sd->max_asid) { 1658 ++sd->asid_generation; 1659 sd->next_asid = 1; 1660 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 1661 } 1662 1663 svm->asid_generation = sd->asid_generation; 1664 svm->vmcb->control.asid = sd->next_asid++; 1665 1666 mark_dirty(svm->vmcb, VMCB_ASID); 1667} 1668 1669static u64 svm_get_dr6(struct kvm_vcpu *vcpu) 1670{ 1671 return to_svm(vcpu)->vmcb->save.dr6; 1672} 1673 1674static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) 1675{ 1676 struct vcpu_svm *svm = to_svm(vcpu); 1677 1678 svm->vmcb->save.dr6 = value; 1679 mark_dirty(svm->vmcb, VMCB_DR); 1680} 1681 1682static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) 1683{ 1684 struct vcpu_svm *svm = to_svm(vcpu); 1685 1686 get_debugreg(vcpu->arch.db[0], 0); 1687 get_debugreg(vcpu->arch.db[1], 1); 1688 get_debugreg(vcpu->arch.db[2], 2); 1689 get_debugreg(vcpu->arch.db[3], 3); 1690 vcpu->arch.dr6 = svm_get_dr6(vcpu); 1691 vcpu->arch.dr7 = svm->vmcb->save.dr7; 1692 1693 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; 1694 set_dr_intercepts(svm); 1695} 1696 1697static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) 1698{ 1699 struct vcpu_svm *svm = to_svm(vcpu); 1700 1701 svm->vmcb->save.dr7 = value; 1702 mark_dirty(svm->vmcb, VMCB_DR); 1703} 1704 1705static int pf_interception(struct vcpu_svm *svm) 1706{ 1707 u64 fault_address = svm->vmcb->control.exit_info_2; 1708 u32 error_code; 1709 int r = 1; 1710 1711 switch (svm->apf_reason) { 1712 default: 1713 error_code = svm->vmcb->control.exit_info_1; 1714 1715 trace_kvm_page_fault(fault_address, error_code); 1716 if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) 1717 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); 1718 r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, 1719 svm->vmcb->control.insn_bytes, 1720 svm->vmcb->control.insn_len); 1721 break; 1722 case KVM_PV_REASON_PAGE_NOT_PRESENT: 1723 svm->apf_reason = 0; 1724 local_irq_disable(); 1725 kvm_async_pf_task_wait(fault_address); 1726 local_irq_enable(); 1727 break; 1728 case KVM_PV_REASON_PAGE_READY: 1729 svm->apf_reason = 0; 1730 local_irq_disable(); 1731 kvm_async_pf_task_wake(fault_address); 1732 local_irq_enable(); 1733 break; 1734 } 1735 return r; 1736} 1737 1738static int db_interception(struct vcpu_svm *svm) 1739{ 1740 struct kvm_run *kvm_run = svm->vcpu.run; 1741 1742 if (!(svm->vcpu.guest_debug & 1743 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && 1744 !svm->nmi_singlestep) { 1745 kvm_queue_exception(&svm->vcpu, DB_VECTOR); 1746 return 1; 1747 } 1748 1749 if (svm->nmi_singlestep) { 1750 svm->nmi_singlestep = false; 1751 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) 1752 svm->vmcb->save.rflags &= 1753 ~(X86_EFLAGS_TF | X86_EFLAGS_RF); 1754 } 1755 1756 if (svm->vcpu.guest_debug & 1757 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { 1758 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1759 kvm_run->debug.arch.pc = 1760 svm->vmcb->save.cs.base + svm->vmcb->save.rip; 1761 kvm_run->debug.arch.exception = DB_VECTOR; 1762 return 0; 1763 } 1764 1765 return 1; 1766} 1767 1768static int bp_interception(struct vcpu_svm *svm) 1769{ 1770 struct kvm_run *kvm_run = svm->vcpu.run; 1771 1772 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1773 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; 1774 kvm_run->debug.arch.exception = BP_VECTOR; 1775 return 0; 1776} 1777 1778static int ud_interception(struct vcpu_svm *svm) 1779{ 1780 int er; 1781 1782 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); 1783 if (er != EMULATE_DONE) 1784 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 1785 return 1; 1786} 1787 1788static int ac_interception(struct vcpu_svm *svm) 1789{ 1790 kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0); 1791 return 1; 1792} 1793 1794static void svm_fpu_activate(struct kvm_vcpu *vcpu) 1795{ 1796 struct vcpu_svm *svm = to_svm(vcpu); 1797 1798 clr_exception_intercept(svm, NM_VECTOR); 1799 1800 svm->vcpu.fpu_active = 1; 1801 update_cr0_intercept(svm); 1802} 1803 1804static int nm_interception(struct vcpu_svm *svm) 1805{ 1806 svm_fpu_activate(&svm->vcpu); 1807 return 1; 1808} 1809 1810static bool is_erratum_383(void) 1811{ 1812 int err, i; 1813 u64 value; 1814 1815 if (!erratum_383_found) 1816 return false; 1817 1818 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err); 1819 if (err) 1820 return false; 1821 1822 /* Bit 62 may or may not be set for this mce */ 1823 value &= ~(1ULL << 62); 1824 1825 if (value != 0xb600000000010015ULL) 1826 return false; 1827 1828 /* Clear MCi_STATUS registers */ 1829 for (i = 0; i < 6; ++i) 1830 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0); 1831 1832 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err); 1833 if (!err) { 1834 u32 low, high; 1835 1836 value &= ~(1ULL << 2); 1837 low = lower_32_bits(value); 1838 high = upper_32_bits(value); 1839 1840 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high); 1841 } 1842 1843 /* Flush tlb to evict multi-match entries */ 1844 __flush_tlb_all(); 1845 1846 return true; 1847} 1848 1849static void svm_handle_mce(struct vcpu_svm *svm) 1850{ 1851 if (is_erratum_383()) { 1852 /* 1853 * Erratum 383 triggered. Guest state is corrupt so kill the 1854 * guest. 1855 */ 1856 pr_err("KVM: Guest triggered AMD Erratum 383\n"); 1857 1858 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu); 1859 1860 return; 1861 } 1862 1863 /* 1864 * On an #MC intercept the MCE handler is not called automatically in 1865 * the host. So do it by hand here. 1866 */ 1867 asm volatile ( 1868 "int $0x12\n"); 1869 /* not sure if we ever come back to this point */ 1870 1871 return; 1872} 1873 1874static int mc_interception(struct vcpu_svm *svm) 1875{ 1876 return 1; 1877} 1878 1879static int shutdown_interception(struct vcpu_svm *svm) 1880{ 1881 struct kvm_run *kvm_run = svm->vcpu.run; 1882 1883 /* 1884 * VMCB is undefined after a SHUTDOWN intercept 1885 * so reinitialize it. 1886 */ 1887 clear_page(svm->vmcb); 1888 init_vmcb(svm); 1889 1890 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 1891 return 0; 1892} 1893 1894static int io_interception(struct vcpu_svm *svm) 1895{ 1896 struct kvm_vcpu *vcpu = &svm->vcpu; 1897 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ 1898 int size, in, string; 1899 unsigned port; 1900 1901 ++svm->vcpu.stat.io_exits; 1902 string = (io_info & SVM_IOIO_STR_MASK) != 0; 1903 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1904 if (string || in) 1905 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 1906 1907 port = io_info >> 16; 1908 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1909 svm->next_rip = svm->vmcb->control.exit_info_2; 1910 skip_emulated_instruction(&svm->vcpu); 1911 1912 return kvm_fast_pio_out(vcpu, size, port); 1913} 1914 1915static int nmi_interception(struct vcpu_svm *svm) 1916{ 1917 return 1; 1918} 1919 1920static int intr_interception(struct vcpu_svm *svm) 1921{ 1922 ++svm->vcpu.stat.irq_exits; 1923 return 1; 1924} 1925 1926static int nop_on_interception(struct vcpu_svm *svm) 1927{ 1928 return 1; 1929} 1930 1931static int halt_interception(struct vcpu_svm *svm) 1932{ 1933 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; 1934 return kvm_emulate_halt(&svm->vcpu); 1935} 1936 1937static int vmmcall_interception(struct vcpu_svm *svm) 1938{ 1939 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 1940 kvm_emulate_hypercall(&svm->vcpu); 1941 return 1; 1942} 1943 1944static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) 1945{ 1946 struct vcpu_svm *svm = to_svm(vcpu); 1947 1948 return svm->nested.nested_cr3; 1949} 1950 1951static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) 1952{ 1953 struct vcpu_svm *svm = to_svm(vcpu); 1954 u64 cr3 = svm->nested.nested_cr3; 1955 u64 pdpte; 1956 int ret; 1957 1958 ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte, 1959 offset_in_page(cr3) + index * 8, 8); 1960 if (ret) 1961 return 0; 1962 return pdpte; 1963} 1964 1965static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, 1966 unsigned long root) 1967{ 1968 struct vcpu_svm *svm = to_svm(vcpu); 1969 1970 svm->vmcb->control.nested_cr3 = root; 1971 mark_dirty(svm->vmcb, VMCB_NPT); 1972 svm_flush_tlb(vcpu); 1973} 1974 1975static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, 1976 struct x86_exception *fault) 1977{ 1978 struct vcpu_svm *svm = to_svm(vcpu); 1979 1980 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) { 1981 /* 1982 * TODO: track the cause of the nested page fault, and 1983 * correctly fill in the high bits of exit_info_1. 1984 */ 1985 svm->vmcb->control.exit_code = SVM_EXIT_NPF; 1986 svm->vmcb->control.exit_code_hi = 0; 1987 svm->vmcb->control.exit_info_1 = (1ULL << 32); 1988 svm->vmcb->control.exit_info_2 = fault->address; 1989 } 1990 1991 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL; 1992 svm->vmcb->control.exit_info_1 |= fault->error_code; 1993 1994 /* 1995 * The present bit is always zero for page structure faults on real 1996 * hardware. 1997 */ 1998 if (svm->vmcb->control.exit_info_1 & (2ULL << 32)) 1999 svm->vmcb->control.exit_info_1 &= ~1; 2000 2001 nested_svm_vmexit(svm); 2002} 2003 2004static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 2005{ 2006 WARN_ON(mmu_is_nested(vcpu)); 2007 kvm_init_shadow_mmu(vcpu); 2008 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; 2009 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; 2010 vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; 2011 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; 2012 vcpu->arch.mmu.shadow_root_level = get_npt_level(); 2013 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 2014} 2015 2016static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) 2017{ 2018 vcpu->arch.walk_mmu = &vcpu->arch.mmu; 2019} 2020 2021static int nested_svm_check_permissions(struct vcpu_svm *svm) 2022{ 2023 if (!(svm->vcpu.arch.efer & EFER_SVME) 2024 || !is_paging(&svm->vcpu)) { 2025 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2026 return 1; 2027 } 2028 2029 if (svm->vmcb->save.cpl) { 2030 kvm_inject_gp(&svm->vcpu, 0); 2031 return 1; 2032 } 2033 2034 return 0; 2035} 2036 2037static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 2038 bool has_error_code, u32 error_code) 2039{ 2040 int vmexit; 2041 2042 if (!is_guest_mode(&svm->vcpu)) 2043 return 0; 2044 2045 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; 2046 svm->vmcb->control.exit_code_hi = 0; 2047 svm->vmcb->control.exit_info_1 = error_code; 2048 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; 2049 2050 vmexit = nested_svm_intercept(svm); 2051 if (vmexit == NESTED_EXIT_DONE) 2052 svm->nested.exit_required = true; 2053 2054 return vmexit; 2055} 2056 2057/* This function returns true if it is save to enable the irq window */ 2058static inline bool nested_svm_intr(struct vcpu_svm *svm) 2059{ 2060 if (!is_guest_mode(&svm->vcpu)) 2061 return true; 2062 2063 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 2064 return true; 2065 2066 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) 2067 return false; 2068 2069 /* 2070 * if vmexit was already requested (by intercepted exception 2071 * for instance) do not overwrite it with "external interrupt" 2072 * vmexit. 2073 */ 2074 if (svm->nested.exit_required) 2075 return false; 2076 2077 svm->vmcb->control.exit_code = SVM_EXIT_INTR; 2078 svm->vmcb->control.exit_info_1 = 0; 2079 svm->vmcb->control.exit_info_2 = 0; 2080 2081 if (svm->nested.intercept & 1ULL) { 2082 /* 2083 * The #vmexit can't be emulated here directly because this 2084 * code path runs with irqs and preemption disabled. A 2085 * #vmexit emulation might sleep. Only signal request for 2086 * the #vmexit here. 2087 */ 2088 svm->nested.exit_required = true; 2089 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); 2090 return false; 2091 } 2092 2093 return true; 2094} 2095 2096/* This function returns true if it is save to enable the nmi window */ 2097static inline bool nested_svm_nmi(struct vcpu_svm *svm) 2098{ 2099 if (!is_guest_mode(&svm->vcpu)) 2100 return true; 2101 2102 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) 2103 return true; 2104 2105 svm->vmcb->control.exit_code = SVM_EXIT_NMI; 2106 svm->nested.exit_required = true; 2107 2108 return false; 2109} 2110 2111static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) 2112{ 2113 struct page *page; 2114 2115 might_sleep(); 2116 2117 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); 2118 if (is_error_page(page)) 2119 goto error; 2120 2121 *_page = page; 2122 2123 return kmap(page); 2124 2125error: 2126 kvm_inject_gp(&svm->vcpu, 0); 2127 2128 return NULL; 2129} 2130 2131static void nested_svm_unmap(struct page *page) 2132{ 2133 kunmap(page); 2134 kvm_release_page_dirty(page); 2135} 2136 2137static int nested_svm_intercept_ioio(struct vcpu_svm *svm) 2138{ 2139 unsigned port, size, iopm_len; 2140 u16 val, mask; 2141 u8 start_bit; 2142 u64 gpa; 2143 2144 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) 2145 return NESTED_EXIT_HOST; 2146 2147 port = svm->vmcb->control.exit_info_1 >> 16; 2148 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> 2149 SVM_IOIO_SIZE_SHIFT; 2150 gpa = svm->nested.vmcb_iopm + (port / 8); 2151 start_bit = port % 8; 2152 iopm_len = (start_bit + size > 8) ? 2 : 1; 2153 mask = (0xf >> (4 - size)) << start_bit; 2154 val = 0; 2155 2156 if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len)) 2157 return NESTED_EXIT_DONE; 2158 2159 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 2160} 2161 2162static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) 2163{ 2164 u32 offset, msr, value; 2165 int write, mask; 2166 2167 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 2168 return NESTED_EXIT_HOST; 2169 2170 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 2171 offset = svm_msrpm_offset(msr); 2172 write = svm->vmcb->control.exit_info_1 & 1; 2173 mask = 1 << ((2 * (msr & 0xf)) + write); 2174 2175 if (offset == MSR_INVALID) 2176 return NESTED_EXIT_DONE; 2177 2178 /* Offset is in 32 bit units but need in 8 bit units */ 2179 offset *= 4; 2180 2181 if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) 2182 return NESTED_EXIT_DONE; 2183 2184 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 2185} 2186 2187static int nested_svm_exit_special(struct vcpu_svm *svm) 2188{ 2189 u32 exit_code = svm->vmcb->control.exit_code; 2190 2191 switch (exit_code) { 2192 case SVM_EXIT_INTR: 2193 case SVM_EXIT_NMI: 2194 case SVM_EXIT_EXCP_BASE + MC_VECTOR: 2195 return NESTED_EXIT_HOST; 2196 case SVM_EXIT_NPF: 2197 /* For now we are always handling NPFs when using them */ 2198 if (npt_enabled) 2199 return NESTED_EXIT_HOST; 2200 break; 2201 case SVM_EXIT_EXCP_BASE + PF_VECTOR: 2202 /* When we're shadowing, trap PFs, but not async PF */ 2203 if (!npt_enabled && svm->apf_reason == 0) 2204 return NESTED_EXIT_HOST; 2205 break; 2206 case SVM_EXIT_EXCP_BASE + NM_VECTOR: 2207 nm_interception(svm); 2208 break; 2209 default: 2210 break; 2211 } 2212 2213 return NESTED_EXIT_CONTINUE; 2214} 2215 2216/* 2217 * If this function returns true, this #vmexit was already handled 2218 */ 2219static int nested_svm_intercept(struct vcpu_svm *svm) 2220{ 2221 u32 exit_code = svm->vmcb->control.exit_code; 2222 int vmexit = NESTED_EXIT_HOST; 2223 2224 switch (exit_code) { 2225 case SVM_EXIT_MSR: 2226 vmexit = nested_svm_exit_handled_msr(svm); 2227 break; 2228 case SVM_EXIT_IOIO: 2229 vmexit = nested_svm_intercept_ioio(svm); 2230 break; 2231 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { 2232 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); 2233 if (svm->nested.intercept_cr & bit) 2234 vmexit = NESTED_EXIT_DONE; 2235 break; 2236 } 2237 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { 2238 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); 2239 if (svm->nested.intercept_dr & bit) 2240 vmexit = NESTED_EXIT_DONE; 2241 break; 2242 } 2243 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { 2244 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); 2245 if (svm->nested.intercept_exceptions & excp_bits) 2246 vmexit = NESTED_EXIT_DONE; 2247 /* async page fault always cause vmexit */ 2248 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && 2249 svm->apf_reason != 0) 2250 vmexit = NESTED_EXIT_DONE; 2251 break; 2252 } 2253 case SVM_EXIT_ERR: { 2254 vmexit = NESTED_EXIT_DONE; 2255 break; 2256 } 2257 default: { 2258 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); 2259 if (svm->nested.intercept & exit_bits) 2260 vmexit = NESTED_EXIT_DONE; 2261 } 2262 } 2263 2264 return vmexit; 2265} 2266 2267static int nested_svm_exit_handled(struct vcpu_svm *svm) 2268{ 2269 int vmexit; 2270 2271 vmexit = nested_svm_intercept(svm); 2272 2273 if (vmexit == NESTED_EXIT_DONE) 2274 nested_svm_vmexit(svm); 2275 2276 return vmexit; 2277} 2278 2279static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb) 2280{ 2281 struct vmcb_control_area *dst = &dst_vmcb->control; 2282 struct vmcb_control_area *from = &from_vmcb->control; 2283 2284 dst->intercept_cr = from->intercept_cr; 2285 dst->intercept_dr = from->intercept_dr; 2286 dst->intercept_exceptions = from->intercept_exceptions; 2287 dst->intercept = from->intercept; 2288 dst->iopm_base_pa = from->iopm_base_pa; 2289 dst->msrpm_base_pa = from->msrpm_base_pa; 2290 dst->tsc_offset = from->tsc_offset; 2291 dst->asid = from->asid; 2292 dst->tlb_ctl = from->tlb_ctl; 2293 dst->int_ctl = from->int_ctl; 2294 dst->int_vector = from->int_vector; 2295 dst->int_state = from->int_state; 2296 dst->exit_code = from->exit_code; 2297 dst->exit_code_hi = from->exit_code_hi; 2298 dst->exit_info_1 = from->exit_info_1; 2299 dst->exit_info_2 = from->exit_info_2; 2300 dst->exit_int_info = from->exit_int_info; 2301 dst->exit_int_info_err = from->exit_int_info_err; 2302 dst->nested_ctl = from->nested_ctl; 2303 dst->event_inj = from->event_inj; 2304 dst->event_inj_err = from->event_inj_err; 2305 dst->nested_cr3 = from->nested_cr3; 2306 dst->lbr_ctl = from->lbr_ctl; 2307} 2308 2309static int nested_svm_vmexit(struct vcpu_svm *svm) 2310{ 2311 struct vmcb *nested_vmcb; 2312 struct vmcb *hsave = svm->nested.hsave; 2313 struct vmcb *vmcb = svm->vmcb; 2314 struct page *page; 2315 2316 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, 2317 vmcb->control.exit_info_1, 2318 vmcb->control.exit_info_2, 2319 vmcb->control.exit_int_info, 2320 vmcb->control.exit_int_info_err, 2321 KVM_ISA_SVM); 2322 2323 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); 2324 if (!nested_vmcb) 2325 return 1; 2326 2327 /* Exit Guest-Mode */ 2328 leave_guest_mode(&svm->vcpu); 2329 svm->nested.vmcb = 0; 2330 2331 /* Give the current vmcb to the guest */ 2332 disable_gif(svm); 2333 2334 nested_vmcb->save.es = vmcb->save.es; 2335 nested_vmcb->save.cs = vmcb->save.cs; 2336 nested_vmcb->save.ss = vmcb->save.ss; 2337 nested_vmcb->save.ds = vmcb->save.ds; 2338 nested_vmcb->save.gdtr = vmcb->save.gdtr; 2339 nested_vmcb->save.idtr = vmcb->save.idtr; 2340 nested_vmcb->save.efer = svm->vcpu.arch.efer; 2341 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); 2342 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); 2343 nested_vmcb->save.cr2 = vmcb->save.cr2; 2344 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; 2345 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); 2346 nested_vmcb->save.rip = vmcb->save.rip; 2347 nested_vmcb->save.rsp = vmcb->save.rsp; 2348 nested_vmcb->save.rax = vmcb->save.rax; 2349 nested_vmcb->save.dr7 = vmcb->save.dr7; 2350 nested_vmcb->save.dr6 = vmcb->save.dr6; 2351 nested_vmcb->save.cpl = vmcb->save.cpl; 2352 2353 nested_vmcb->control.int_ctl = vmcb->control.int_ctl; 2354 nested_vmcb->control.int_vector = vmcb->control.int_vector; 2355 nested_vmcb->control.int_state = vmcb->control.int_state; 2356 nested_vmcb->control.exit_code = vmcb->control.exit_code; 2357 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; 2358 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1; 2359 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; 2360 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; 2361 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; 2362 nested_vmcb->control.next_rip = vmcb->control.next_rip; 2363 2364 /* 2365 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have 2366 * to make sure that we do not lose injected events. So check event_inj 2367 * here and copy it to exit_int_info if it is valid. 2368 * Exit_int_info and event_inj can't be both valid because the case 2369 * below only happens on a VMRUN instruction intercept which has 2370 * no valid exit_int_info set. 2371 */ 2372 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { 2373 struct vmcb_control_area *nc = &nested_vmcb->control; 2374 2375 nc->exit_int_info = vmcb->control.event_inj; 2376 nc->exit_int_info_err = vmcb->control.event_inj_err; 2377 } 2378 2379 nested_vmcb->control.tlb_ctl = 0; 2380 nested_vmcb->control.event_inj = 0; 2381 nested_vmcb->control.event_inj_err = 0; 2382 2383 /* We always set V_INTR_MASKING and remember the old value in hflags */ 2384 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 2385 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; 2386 2387 /* Restore the original control entries */ 2388 copy_vmcb_control_area(vmcb, hsave); 2389 2390 kvm_clear_exception_queue(&svm->vcpu); 2391 kvm_clear_interrupt_queue(&svm->vcpu); 2392 2393 svm->nested.nested_cr3 = 0; 2394 2395 /* Restore selected save entries */ 2396 svm->vmcb->save.es = hsave->save.es; 2397 svm->vmcb->save.cs = hsave->save.cs; 2398 svm->vmcb->save.ss = hsave->save.ss; 2399 svm->vmcb->save.ds = hsave->save.ds; 2400 svm->vmcb->save.gdtr = hsave->save.gdtr; 2401 svm->vmcb->save.idtr = hsave->save.idtr; 2402 kvm_set_rflags(&svm->vcpu, hsave->save.rflags); 2403 svm_set_efer(&svm->vcpu, hsave->save.efer); 2404 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); 2405 svm_set_cr4(&svm->vcpu, hsave->save.cr4); 2406 if (npt_enabled) { 2407 svm->vmcb->save.cr3 = hsave->save.cr3; 2408 svm->vcpu.arch.cr3 = hsave->save.cr3; 2409 } else { 2410 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); 2411 } 2412 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); 2413 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); 2414 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip); 2415 svm->vmcb->save.dr7 = 0; 2416 svm->vmcb->save.cpl = 0; 2417 svm->vmcb->control.exit_int_info = 0; 2418 2419 mark_all_dirty(svm->vmcb); 2420 2421 nested_svm_unmap(page); 2422 2423 nested_svm_uninit_mmu_context(&svm->vcpu); 2424 kvm_mmu_reset_context(&svm->vcpu); 2425 kvm_mmu_load(&svm->vcpu); 2426 2427 return 0; 2428} 2429 2430static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) 2431{ 2432 /* 2433 * This function merges the msr permission bitmaps of kvm and the 2434 * nested vmcb. It is optimized in that it only merges the parts where 2435 * the kvm msr permission bitmap may contain zero bits 2436 */ 2437 int i; 2438 2439 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 2440 return true; 2441 2442 for (i = 0; i < MSRPM_OFFSETS; i++) { 2443 u32 value, p; 2444 u64 offset; 2445 2446 if (msrpm_offsets[i] == 0xffffffff) 2447 break; 2448 2449 p = msrpm_offsets[i]; 2450 offset = svm->nested.vmcb_msrpm + (p * 4); 2451 2452 if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) 2453 return false; 2454 2455 svm->nested.msrpm[p] = svm->msrpm[p] | value; 2456 } 2457 2458 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); 2459 2460 return true; 2461} 2462 2463static bool nested_vmcb_checks(struct vmcb *vmcb) 2464{ 2465 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0) 2466 return false; 2467 2468 if (vmcb->control.asid == 0) 2469 return false; 2470 2471 if (vmcb->control.nested_ctl && !npt_enabled) 2472 return false; 2473 2474 return true; 2475} 2476 2477static bool nested_svm_vmrun(struct vcpu_svm *svm) 2478{ 2479 struct vmcb *nested_vmcb; 2480 struct vmcb *hsave = svm->nested.hsave; 2481 struct vmcb *vmcb = svm->vmcb; 2482 struct page *page; 2483 u64 vmcb_gpa; 2484 2485 vmcb_gpa = svm->vmcb->save.rax; 2486 2487 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2488 if (!nested_vmcb) 2489 return false; 2490 2491 if (!nested_vmcb_checks(nested_vmcb)) { 2492 nested_vmcb->control.exit_code = SVM_EXIT_ERR; 2493 nested_vmcb->control.exit_code_hi = 0; 2494 nested_vmcb->control.exit_info_1 = 0; 2495 nested_vmcb->control.exit_info_2 = 0; 2496 2497 nested_svm_unmap(page); 2498 2499 return false; 2500 } 2501 2502 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa, 2503 nested_vmcb->save.rip, 2504 nested_vmcb->control.int_ctl, 2505 nested_vmcb->control.event_inj, 2506 nested_vmcb->control.nested_ctl); 2507 2508 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff, 2509 nested_vmcb->control.intercept_cr >> 16, 2510 nested_vmcb->control.intercept_exceptions, 2511 nested_vmcb->control.intercept); 2512 2513 /* Clear internal status */ 2514 kvm_clear_exception_queue(&svm->vcpu); 2515 kvm_clear_interrupt_queue(&svm->vcpu); 2516 2517 /* 2518 * Save the old vmcb, so we don't need to pick what we save, but can 2519 * restore everything when a VMEXIT occurs 2520 */ 2521 hsave->save.es = vmcb->save.es; 2522 hsave->save.cs = vmcb->save.cs; 2523 hsave->save.ss = vmcb->save.ss; 2524 hsave->save.ds = vmcb->save.ds; 2525 hsave->save.gdtr = vmcb->save.gdtr; 2526 hsave->save.idtr = vmcb->save.idtr; 2527 hsave->save.efer = svm->vcpu.arch.efer; 2528 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); 2529 hsave->save.cr4 = svm->vcpu.arch.cr4; 2530 hsave->save.rflags = kvm_get_rflags(&svm->vcpu); 2531 hsave->save.rip = kvm_rip_read(&svm->vcpu); 2532 hsave->save.rsp = vmcb->save.rsp; 2533 hsave->save.rax = vmcb->save.rax; 2534 if (npt_enabled) 2535 hsave->save.cr3 = vmcb->save.cr3; 2536 else 2537 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); 2538 2539 copy_vmcb_control_area(hsave, vmcb); 2540 2541 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) 2542 svm->vcpu.arch.hflags |= HF_HIF_MASK; 2543 else 2544 svm->vcpu.arch.hflags &= ~HF_HIF_MASK; 2545 2546 if (nested_vmcb->control.nested_ctl) { 2547 kvm_mmu_unload(&svm->vcpu); 2548 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3; 2549 nested_svm_init_mmu_context(&svm->vcpu); 2550 } 2551 2552 /* Load the nested guest state */ 2553 svm->vmcb->save.es = nested_vmcb->save.es; 2554 svm->vmcb->save.cs = nested_vmcb->save.cs; 2555 svm->vmcb->save.ss = nested_vmcb->save.ss; 2556 svm->vmcb->save.ds = nested_vmcb->save.ds; 2557 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; 2558 svm->vmcb->save.idtr = nested_vmcb->save.idtr; 2559 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); 2560 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); 2561 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); 2562 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); 2563 if (npt_enabled) { 2564 svm->vmcb->save.cr3 = nested_vmcb->save.cr3; 2565 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; 2566 } else 2567 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); 2568 2569 /* Guest paging mode is active - reset mmu */ 2570 kvm_mmu_reset_context(&svm->vcpu); 2571 2572 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; 2573 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); 2574 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); 2575 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); 2576 2577 /* In case we don't even reach vcpu_run, the fields are not updated */ 2578 svm->vmcb->save.rax = nested_vmcb->save.rax; 2579 svm->vmcb->save.rsp = nested_vmcb->save.rsp; 2580 svm->vmcb->save.rip = nested_vmcb->save.rip; 2581 svm->vmcb->save.dr7 = nested_vmcb->save.dr7; 2582 svm->vmcb->save.dr6 = nested_vmcb->save.dr6; 2583 svm->vmcb->save.cpl = nested_vmcb->save.cpl; 2584 2585 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; 2586 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; 2587 2588 /* cache intercepts */ 2589 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; 2590 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; 2591 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; 2592 svm->nested.intercept = nested_vmcb->control.intercept; 2593 2594 svm_flush_tlb(&svm->vcpu); 2595 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; 2596 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) 2597 svm->vcpu.arch.hflags |= HF_VINTR_MASK; 2598 else 2599 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; 2600 2601 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { 2602 /* We only want the cr8 intercept bits of the guest */ 2603 clr_cr_intercept(svm, INTERCEPT_CR8_READ); 2604 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); 2605 } 2606 2607 /* We don't want to see VMMCALLs from a nested guest */ 2608 clr_intercept(svm, INTERCEPT_VMMCALL); 2609 2610 svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; 2611 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; 2612 svm->vmcb->control.int_state = nested_vmcb->control.int_state; 2613 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; 2614 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 2615 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 2616 2617 nested_svm_unmap(page); 2618 2619 /* Enter Guest-Mode */ 2620 enter_guest_mode(&svm->vcpu); 2621 2622 /* 2623 * Merge guest and host intercepts - must be called with vcpu in 2624 * guest-mode to take affect here 2625 */ 2626 recalc_intercepts(svm); 2627 2628 svm->nested.vmcb = vmcb_gpa; 2629 2630 enable_gif(svm); 2631 2632 mark_all_dirty(svm->vmcb); 2633 2634 return true; 2635} 2636 2637static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) 2638{ 2639 to_vmcb->save.fs = from_vmcb->save.fs; 2640 to_vmcb->save.gs = from_vmcb->save.gs; 2641 to_vmcb->save.tr = from_vmcb->save.tr; 2642 to_vmcb->save.ldtr = from_vmcb->save.ldtr; 2643 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base; 2644 to_vmcb->save.star = from_vmcb->save.star; 2645 to_vmcb->save.lstar = from_vmcb->save.lstar; 2646 to_vmcb->save.cstar = from_vmcb->save.cstar; 2647 to_vmcb->save.sfmask = from_vmcb->save.sfmask; 2648 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs; 2649 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp; 2650 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; 2651} 2652 2653static int vmload_interception(struct vcpu_svm *svm) 2654{ 2655 struct vmcb *nested_vmcb; 2656 struct page *page; 2657 2658 if (nested_svm_check_permissions(svm)) 2659 return 1; 2660 2661 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2662 if (!nested_vmcb) 2663 return 1; 2664 2665 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2666 skip_emulated_instruction(&svm->vcpu); 2667 2668 nested_svm_vmloadsave(nested_vmcb, svm->vmcb); 2669 nested_svm_unmap(page); 2670 2671 return 1; 2672} 2673 2674static int vmsave_interception(struct vcpu_svm *svm) 2675{ 2676 struct vmcb *nested_vmcb; 2677 struct page *page; 2678 2679 if (nested_svm_check_permissions(svm)) 2680 return 1; 2681 2682 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2683 if (!nested_vmcb) 2684 return 1; 2685 2686 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2687 skip_emulated_instruction(&svm->vcpu); 2688 2689 nested_svm_vmloadsave(svm->vmcb, nested_vmcb); 2690 nested_svm_unmap(page); 2691 2692 return 1; 2693} 2694 2695static int vmrun_interception(struct vcpu_svm *svm) 2696{ 2697 if (nested_svm_check_permissions(svm)) 2698 return 1; 2699 2700 /* Save rip after vmrun instruction */ 2701 kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3); 2702 2703 if (!nested_svm_vmrun(svm)) 2704 return 1; 2705 2706 if (!nested_svm_vmrun_msrpm(svm)) 2707 goto failed; 2708 2709 return 1; 2710 2711failed: 2712 2713 svm->vmcb->control.exit_code = SVM_EXIT_ERR; 2714 svm->vmcb->control.exit_code_hi = 0; 2715 svm->vmcb->control.exit_info_1 = 0; 2716 svm->vmcb->control.exit_info_2 = 0; 2717 2718 nested_svm_vmexit(svm); 2719 2720 return 1; 2721} 2722 2723static int stgi_interception(struct vcpu_svm *svm) 2724{ 2725 if (nested_svm_check_permissions(svm)) 2726 return 1; 2727 2728 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2729 skip_emulated_instruction(&svm->vcpu); 2730 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 2731 2732 enable_gif(svm); 2733 2734 return 1; 2735} 2736 2737static int clgi_interception(struct vcpu_svm *svm) 2738{ 2739 if (nested_svm_check_permissions(svm)) 2740 return 1; 2741 2742 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2743 skip_emulated_instruction(&svm->vcpu); 2744 2745 disable_gif(svm); 2746 2747 /* After a CLGI no interrupts should come */ 2748 svm_clear_vintr(svm); 2749 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 2750 2751 mark_dirty(svm->vmcb, VMCB_INTR); 2752 2753 return 1; 2754} 2755 2756static int invlpga_interception(struct vcpu_svm *svm) 2757{ 2758 struct kvm_vcpu *vcpu = &svm->vcpu; 2759 2760 trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX), 2761 kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); 2762 2763 /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ 2764 kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); 2765 2766 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2767 skip_emulated_instruction(&svm->vcpu); 2768 return 1; 2769} 2770 2771static int skinit_interception(struct vcpu_svm *svm) 2772{ 2773 trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); 2774 2775 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2776 return 1; 2777} 2778 2779static int wbinvd_interception(struct vcpu_svm *svm) 2780{ 2781 kvm_emulate_wbinvd(&svm->vcpu); 2782 return 1; 2783} 2784 2785static int xsetbv_interception(struct vcpu_svm *svm) 2786{ 2787 u64 new_bv = kvm_read_edx_eax(&svm->vcpu); 2788 u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); 2789 2790 if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { 2791 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2792 skip_emulated_instruction(&svm->vcpu); 2793 } 2794 2795 return 1; 2796} 2797 2798static int task_switch_interception(struct vcpu_svm *svm) 2799{ 2800 u16 tss_selector; 2801 int reason; 2802 int int_type = svm->vmcb->control.exit_int_info & 2803 SVM_EXITINTINFO_TYPE_MASK; 2804 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK; 2805 uint32_t type = 2806 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; 2807 uint32_t idt_v = 2808 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; 2809 bool has_error_code = false; 2810 u32 error_code = 0; 2811 2812 tss_selector = (u16)svm->vmcb->control.exit_info_1; 2813 2814 if (svm->vmcb->control.exit_info_2 & 2815 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) 2816 reason = TASK_SWITCH_IRET; 2817 else if (svm->vmcb->control.exit_info_2 & 2818 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) 2819 reason = TASK_SWITCH_JMP; 2820 else if (idt_v) 2821 reason = TASK_SWITCH_GATE; 2822 else 2823 reason = TASK_SWITCH_CALL; 2824 2825 if (reason == TASK_SWITCH_GATE) { 2826 switch (type) { 2827 case SVM_EXITINTINFO_TYPE_NMI: 2828 svm->vcpu.arch.nmi_injected = false; 2829 break; 2830 case SVM_EXITINTINFO_TYPE_EXEPT: 2831 if (svm->vmcb->control.exit_info_2 & 2832 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { 2833 has_error_code = true; 2834 error_code = 2835 (u32)svm->vmcb->control.exit_info_2; 2836 } 2837 kvm_clear_exception_queue(&svm->vcpu); 2838 break; 2839 case SVM_EXITINTINFO_TYPE_INTR: 2840 kvm_clear_interrupt_queue(&svm->vcpu); 2841 break; 2842 default: 2843 break; 2844 } 2845 } 2846 2847 if (reason != TASK_SWITCH_GATE || 2848 int_type == SVM_EXITINTINFO_TYPE_SOFT || 2849 (int_type == SVM_EXITINTINFO_TYPE_EXEPT && 2850 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) 2851 skip_emulated_instruction(&svm->vcpu); 2852 2853 if (int_type != SVM_EXITINTINFO_TYPE_SOFT) 2854 int_vec = -1; 2855 2856 if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, 2857 has_error_code, error_code) == EMULATE_FAIL) { 2858 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 2859 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 2860 svm->vcpu.run->internal.ndata = 0; 2861 return 0; 2862 } 2863 return 1; 2864} 2865 2866static int cpuid_interception(struct vcpu_svm *svm) 2867{ 2868 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 2869 kvm_emulate_cpuid(&svm->vcpu); 2870 return 1; 2871} 2872 2873static int iret_interception(struct vcpu_svm *svm) 2874{ 2875 ++svm->vcpu.stat.nmi_window_exits; 2876 clr_intercept(svm, INTERCEPT_IRET); 2877 svm->vcpu.arch.hflags |= HF_IRET_MASK; 2878 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); 2879 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 2880 return 1; 2881} 2882 2883static int invlpg_interception(struct vcpu_svm *svm) 2884{ 2885 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) 2886 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 2887 2888 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); 2889 skip_emulated_instruction(&svm->vcpu); 2890 return 1; 2891} 2892 2893static int emulate_on_interception(struct vcpu_svm *svm) 2894{ 2895 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 2896} 2897 2898static int rdpmc_interception(struct vcpu_svm *svm) 2899{ 2900 int err; 2901 2902 if (!static_cpu_has(X86_FEATURE_NRIPS)) 2903 return emulate_on_interception(svm); 2904 2905 err = kvm_rdpmc(&svm->vcpu); 2906 kvm_complete_insn_gp(&svm->vcpu, err); 2907 2908 return 1; 2909} 2910 2911static bool check_selective_cr0_intercepted(struct vcpu_svm *svm, 2912 unsigned long val) 2913{ 2914 unsigned long cr0 = svm->vcpu.arch.cr0; 2915 bool ret = false; 2916 u64 intercept; 2917 2918 intercept = svm->nested.intercept; 2919 2920 if (!is_guest_mode(&svm->vcpu) || 2921 (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) 2922 return false; 2923 2924 cr0 &= ~SVM_CR0_SELECTIVE_MASK; 2925 val &= ~SVM_CR0_SELECTIVE_MASK; 2926 2927 if (cr0 ^ val) { 2928 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; 2929 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); 2930 } 2931 2932 return ret; 2933} 2934 2935#define CR_VALID (1ULL << 63) 2936 2937static int cr_interception(struct vcpu_svm *svm) 2938{ 2939 int reg, cr; 2940 unsigned long val; 2941 int err; 2942 2943 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) 2944 return emulate_on_interception(svm); 2945 2946 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0)) 2947 return emulate_on_interception(svm); 2948 2949 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; 2950 if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE) 2951 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0; 2952 else 2953 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; 2954 2955 err = 0; 2956 if (cr >= 16) { /* mov to cr */ 2957 cr -= 16; 2958 val = kvm_register_read(&svm->vcpu, reg); 2959 switch (cr) { 2960 case 0: 2961 if (!check_selective_cr0_intercepted(svm, val)) 2962 err = kvm_set_cr0(&svm->vcpu, val); 2963 else 2964 return 1; 2965 2966 break; 2967 case 3: 2968 err = kvm_set_cr3(&svm->vcpu, val); 2969 break; 2970 case 4: 2971 err = kvm_set_cr4(&svm->vcpu, val); 2972 break; 2973 case 8: 2974 err = kvm_set_cr8(&svm->vcpu, val); 2975 break; 2976 default: 2977 WARN(1, "unhandled write to CR%d", cr); 2978 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2979 return 1; 2980 } 2981 } else { /* mov from cr */ 2982 switch (cr) { 2983 case 0: 2984 val = kvm_read_cr0(&svm->vcpu); 2985 break; 2986 case 2: 2987 val = svm->vcpu.arch.cr2; 2988 break; 2989 case 3: 2990 val = kvm_read_cr3(&svm->vcpu); 2991 break; 2992 case 4: 2993 val = kvm_read_cr4(&svm->vcpu); 2994 break; 2995 case 8: 2996 val = kvm_get_cr8(&svm->vcpu); 2997 break; 2998 default: 2999 WARN(1, "unhandled read from CR%d", cr); 3000 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 3001 return 1; 3002 } 3003 kvm_register_write(&svm->vcpu, reg, val); 3004 } 3005 kvm_complete_insn_gp(&svm->vcpu, err); 3006 3007 return 1; 3008} 3009 3010static int dr_interception(struct vcpu_svm *svm) 3011{ 3012 int reg, dr; 3013 unsigned long val; 3014 3015 if (svm->vcpu.guest_debug == 0) { 3016 /* 3017 * No more DR vmexits; force a reload of the debug registers 3018 * and reenter on this instruction. The next vmexit will 3019 * retrieve the full state of the debug registers. 3020 */ 3021 clr_dr_intercepts(svm); 3022 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; 3023 return 1; 3024 } 3025 3026 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) 3027 return emulate_on_interception(svm); 3028 3029 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; 3030 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0; 3031 3032 if (dr >= 16) { /* mov to DRn */ 3033 if (!kvm_require_dr(&svm->vcpu, dr - 16)) 3034 return 1; 3035 val = kvm_register_read(&svm->vcpu, reg); 3036 kvm_set_dr(&svm->vcpu, dr - 16, val); 3037 } else { 3038 if (!kvm_require_dr(&svm->vcpu, dr)) 3039 return 1; 3040 kvm_get_dr(&svm->vcpu, dr, &val); 3041 kvm_register_write(&svm->vcpu, reg, val); 3042 } 3043 3044 skip_emulated_instruction(&svm->vcpu); 3045 3046 return 1; 3047} 3048 3049static int cr8_write_interception(struct vcpu_svm *svm) 3050{ 3051 struct kvm_run *kvm_run = svm->vcpu.run; 3052 int r; 3053 3054 u8 cr8_prev = kvm_get_cr8(&svm->vcpu); 3055 /* instruction emulation calls kvm_set_cr8() */ 3056 r = cr_interception(svm); 3057 if (irqchip_in_kernel(svm->vcpu.kvm)) 3058 return r; 3059 if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) 3060 return r; 3061 kvm_run->exit_reason = KVM_EXIT_SET_TPR; 3062 return 0; 3063} 3064 3065static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) 3066{ 3067 struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); 3068 return vmcb->control.tsc_offset + 3069 svm_scale_tsc(vcpu, host_tsc); 3070} 3071 3072static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) 3073{ 3074 struct vcpu_svm *svm = to_svm(vcpu); 3075 3076 switch (ecx) { 3077 case MSR_IA32_TSC: { 3078 *data = svm->vmcb->control.tsc_offset + 3079 svm_scale_tsc(vcpu, native_read_tsc()); 3080 3081 break; 3082 } 3083 case MSR_STAR: 3084 *data = svm->vmcb->save.star; 3085 break; 3086#ifdef CONFIG_X86_64 3087 case MSR_LSTAR: 3088 *data = svm->vmcb->save.lstar; 3089 break; 3090 case MSR_CSTAR: 3091 *data = svm->vmcb->save.cstar; 3092 break; 3093 case MSR_KERNEL_GS_BASE: 3094 *data = svm->vmcb->save.kernel_gs_base; 3095 break; 3096 case MSR_SYSCALL_MASK: 3097 *data = svm->vmcb->save.sfmask; 3098 break; 3099#endif 3100 case MSR_IA32_SYSENTER_CS: 3101 *data = svm->vmcb->save.sysenter_cs; 3102 break; 3103 case MSR_IA32_SYSENTER_EIP: 3104 *data = svm->sysenter_eip; 3105 break; 3106 case MSR_IA32_SYSENTER_ESP: 3107 *data = svm->sysenter_esp; 3108 break; 3109 /* 3110 * Nobody will change the following 5 values in the VMCB so we can 3111 * safely return them on rdmsr. They will always be 0 until LBRV is 3112 * implemented. 3113 */ 3114 case MSR_IA32_DEBUGCTLMSR: 3115 *data = svm->vmcb->save.dbgctl; 3116 break; 3117 case MSR_IA32_LASTBRANCHFROMIP: 3118 *data = svm->vmcb->save.br_from; 3119 break; 3120 case MSR_IA32_LASTBRANCHTOIP: 3121 *data = svm->vmcb->save.br_to; 3122 break; 3123 case MSR_IA32_LASTINTFROMIP: 3124 *data = svm->vmcb->save.last_excp_from; 3125 break; 3126 case MSR_IA32_LASTINTTOIP: 3127 *data = svm->vmcb->save.last_excp_to; 3128 break; 3129 case MSR_VM_HSAVE_PA: 3130 *data = svm->nested.hsave_msr; 3131 break; 3132 case MSR_VM_CR: 3133 *data = svm->nested.vm_cr_msr; 3134 break; 3135 case MSR_IA32_UCODE_REV: 3136 *data = 0x01000065; 3137 break; 3138 default: 3139 return kvm_get_msr_common(vcpu, ecx, data); 3140 } 3141 return 0; 3142} 3143 3144static int rdmsr_interception(struct vcpu_svm *svm) 3145{ 3146 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); 3147 u64 data; 3148 3149 if (svm_get_msr(&svm->vcpu, ecx, &data)) { 3150 trace_kvm_msr_read_ex(ecx); 3151 kvm_inject_gp(&svm->vcpu, 0); 3152 } else { 3153 trace_kvm_msr_read(ecx, data); 3154 3155 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff); 3156 kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32); 3157 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3158 skip_emulated_instruction(&svm->vcpu); 3159 } 3160 return 1; 3161} 3162 3163static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) 3164{ 3165 struct vcpu_svm *svm = to_svm(vcpu); 3166 int svm_dis, chg_mask; 3167 3168 if (data & ~SVM_VM_CR_VALID_MASK) 3169 return 1; 3170 3171 chg_mask = SVM_VM_CR_VALID_MASK; 3172 3173 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) 3174 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); 3175 3176 svm->nested.vm_cr_msr &= ~chg_mask; 3177 svm->nested.vm_cr_msr |= (data & chg_mask); 3178 3179 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; 3180 3181 /* check for svm_disable while efer.svme is set */ 3182 if (svm_dis && (vcpu->arch.efer & EFER_SVME)) 3183 return 1; 3184 3185 return 0; 3186} 3187 3188static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) 3189{ 3190 struct vcpu_svm *svm = to_svm(vcpu); 3191 3192 u32 ecx = msr->index; 3193 u64 data = msr->data; 3194 switch (ecx) { 3195 case MSR_IA32_TSC: 3196 kvm_write_tsc(vcpu, msr); 3197 break; 3198 case MSR_STAR: 3199 svm->vmcb->save.star = data; 3200 break; 3201#ifdef CONFIG_X86_64 3202 case MSR_LSTAR: 3203 svm->vmcb->save.lstar = data; 3204 break; 3205 case MSR_CSTAR: 3206 svm->vmcb->save.cstar = data; 3207 break; 3208 case MSR_KERNEL_GS_BASE: 3209 svm->vmcb->save.kernel_gs_base = data; 3210 break; 3211 case MSR_SYSCALL_MASK: 3212 svm->vmcb->save.sfmask = data; 3213 break; 3214#endif 3215 case MSR_IA32_SYSENTER_CS: 3216 svm->vmcb->save.sysenter_cs = data; 3217 break; 3218 case MSR_IA32_SYSENTER_EIP: 3219 svm->sysenter_eip = data; 3220 svm->vmcb->save.sysenter_eip = data; 3221 break; 3222 case MSR_IA32_SYSENTER_ESP: 3223 svm->sysenter_esp = data; 3224 svm->vmcb->save.sysenter_esp = data; 3225 break; 3226 case MSR_IA32_DEBUGCTLMSR: 3227 if (!boot_cpu_has(X86_FEATURE_LBRV)) { 3228 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", 3229 __func__, data); 3230 break; 3231 } 3232 if (data & DEBUGCTL_RESERVED_BITS) 3233 return 1; 3234 3235 svm->vmcb->save.dbgctl = data; 3236 mark_dirty(svm->vmcb, VMCB_LBR); 3237 if (data & (1ULL<<0)) 3238 svm_enable_lbrv(svm); 3239 else 3240 svm_disable_lbrv(svm); 3241 break; 3242 case MSR_VM_HSAVE_PA: 3243 svm->nested.hsave_msr = data; 3244 break; 3245 case MSR_VM_CR: 3246 return svm_set_vm_cr(vcpu, data); 3247 case MSR_VM_IGNNE: 3248 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); 3249 break; 3250 default: 3251 return kvm_set_msr_common(vcpu, msr); 3252 } 3253 return 0; 3254} 3255 3256static int wrmsr_interception(struct vcpu_svm *svm) 3257{ 3258 struct msr_data msr; 3259 u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); 3260 u64 data = kvm_read_edx_eax(&svm->vcpu); 3261 3262 msr.data = data; 3263 msr.index = ecx; 3264 msr.host_initiated = false; 3265 3266 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3267 if (kvm_set_msr(&svm->vcpu, &msr)) { 3268 trace_kvm_msr_write_ex(ecx, data); 3269 kvm_inject_gp(&svm->vcpu, 0); 3270 } else { 3271 trace_kvm_msr_write(ecx, data); 3272 skip_emulated_instruction(&svm->vcpu); 3273 } 3274 return 1; 3275} 3276 3277static int msr_interception(struct vcpu_svm *svm) 3278{ 3279 if (svm->vmcb->control.exit_info_1) 3280 return wrmsr_interception(svm); 3281 else 3282 return rdmsr_interception(svm); 3283} 3284 3285static int interrupt_window_interception(struct vcpu_svm *svm) 3286{ 3287 struct kvm_run *kvm_run = svm->vcpu.run; 3288 3289 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 3290 svm_clear_vintr(svm); 3291 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 3292 mark_dirty(svm->vmcb, VMCB_INTR); 3293 ++svm->vcpu.stat.irq_window_exits; 3294 /* 3295 * If the user space waits to inject interrupts, exit as soon as 3296 * possible 3297 */ 3298 if (!irqchip_in_kernel(svm->vcpu.kvm) && 3299 kvm_run->request_interrupt_window && 3300 !kvm_cpu_has_interrupt(&svm->vcpu)) { 3301 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 3302 return 0; 3303 } 3304 3305 return 1; 3306} 3307 3308static int pause_interception(struct vcpu_svm *svm) 3309{ 3310 kvm_vcpu_on_spin(&(svm->vcpu)); 3311 return 1; 3312} 3313 3314static int nop_interception(struct vcpu_svm *svm) 3315{ 3316 skip_emulated_instruction(&(svm->vcpu)); 3317 return 1; 3318} 3319 3320static int monitor_interception(struct vcpu_svm *svm) 3321{ 3322 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); 3323 return nop_interception(svm); 3324} 3325 3326static int mwait_interception(struct vcpu_svm *svm) 3327{ 3328 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); 3329 return nop_interception(svm); 3330} 3331 3332static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { 3333 [SVM_EXIT_READ_CR0] = cr_interception, 3334 [SVM_EXIT_READ_CR3] = cr_interception, 3335 [SVM_EXIT_READ_CR4] = cr_interception, 3336 [SVM_EXIT_READ_CR8] = cr_interception, 3337 [SVM_EXIT_CR0_SEL_WRITE] = cr_interception, 3338 [SVM_EXIT_WRITE_CR0] = cr_interception, 3339 [SVM_EXIT_WRITE_CR3] = cr_interception, 3340 [SVM_EXIT_WRITE_CR4] = cr_interception, 3341 [SVM_EXIT_WRITE_CR8] = cr8_write_interception, 3342 [SVM_EXIT_READ_DR0] = dr_interception, 3343 [SVM_EXIT_READ_DR1] = dr_interception, 3344 [SVM_EXIT_READ_DR2] = dr_interception, 3345 [SVM_EXIT_READ_DR3] = dr_interception, 3346 [SVM_EXIT_READ_DR4] = dr_interception, 3347 [SVM_EXIT_READ_DR5] = dr_interception, 3348 [SVM_EXIT_READ_DR6] = dr_interception, 3349 [SVM_EXIT_READ_DR7] = dr_interception, 3350 [SVM_EXIT_WRITE_DR0] = dr_interception, 3351 [SVM_EXIT_WRITE_DR1] = dr_interception, 3352 [SVM_EXIT_WRITE_DR2] = dr_interception, 3353 [SVM_EXIT_WRITE_DR3] = dr_interception, 3354 [SVM_EXIT_WRITE_DR4] = dr_interception, 3355 [SVM_EXIT_WRITE_DR5] = dr_interception, 3356 [SVM_EXIT_WRITE_DR6] = dr_interception, 3357 [SVM_EXIT_WRITE_DR7] = dr_interception, 3358 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, 3359 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, 3360 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 3361 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 3362 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 3363 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, 3364 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, 3365 [SVM_EXIT_INTR] = intr_interception, 3366 [SVM_EXIT_NMI] = nmi_interception, 3367 [SVM_EXIT_SMI] = nop_on_interception, 3368 [SVM_EXIT_INIT] = nop_on_interception, 3369 [SVM_EXIT_VINTR] = interrupt_window_interception, 3370 [SVM_EXIT_RDPMC] = rdpmc_interception, 3371 [SVM_EXIT_CPUID] = cpuid_interception, 3372 [SVM_EXIT_IRET] = iret_interception, 3373 [SVM_EXIT_INVD] = emulate_on_interception, 3374 [SVM_EXIT_PAUSE] = pause_interception, 3375 [SVM_EXIT_HLT] = halt_interception, 3376 [SVM_EXIT_INVLPG] = invlpg_interception, 3377 [SVM_EXIT_INVLPGA] = invlpga_interception, 3378 [SVM_EXIT_IOIO] = io_interception, 3379 [SVM_EXIT_MSR] = msr_interception, 3380 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 3381 [SVM_EXIT_SHUTDOWN] = shutdown_interception, 3382 [SVM_EXIT_VMRUN] = vmrun_interception, 3383 [SVM_EXIT_VMMCALL] = vmmcall_interception, 3384 [SVM_EXIT_VMLOAD] = vmload_interception, 3385 [SVM_EXIT_VMSAVE] = vmsave_interception, 3386 [SVM_EXIT_STGI] = stgi_interception, 3387 [SVM_EXIT_CLGI] = clgi_interception, 3388 [SVM_EXIT_SKINIT] = skinit_interception, 3389 [SVM_EXIT_WBINVD] = wbinvd_interception, 3390 [SVM_EXIT_MONITOR] = monitor_interception, 3391 [SVM_EXIT_MWAIT] = mwait_interception, 3392 [SVM_EXIT_XSETBV] = xsetbv_interception, 3393 [SVM_EXIT_NPF] = pf_interception, 3394}; 3395 3396static void dump_vmcb(struct kvm_vcpu *vcpu) 3397{ 3398 struct vcpu_svm *svm = to_svm(vcpu); 3399 struct vmcb_control_area *control = &svm->vmcb->control; 3400 struct vmcb_save_area *save = &svm->vmcb->save; 3401 3402 pr_err("VMCB Control Area:\n"); 3403 pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); 3404 pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); 3405 pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); 3406 pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); 3407 pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); 3408 pr_err("%-20s%016llx\n", "intercepts:", control->intercept); 3409 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); 3410 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); 3411 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); 3412 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); 3413 pr_err("%-20s%d\n", "asid:", control->asid); 3414 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); 3415 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); 3416 pr_err("%-20s%08x\n", "int_vector:", control->int_vector); 3417 pr_err("%-20s%08x\n", "int_state:", control->int_state); 3418 pr_err("%-20s%08x\n", "exit_code:", control->exit_code); 3419 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); 3420 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); 3421 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); 3422 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); 3423 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); 3424 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); 3425 pr_err("%-20s%08x\n", "event_inj:", control->event_inj); 3426 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); 3427 pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); 3428 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); 3429 pr_err("VMCB State Save Area:\n"); 3430 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3431 "es:", 3432 save->es.selector, save->es.attrib, 3433 save->es.limit, save->es.base); 3434 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3435 "cs:", 3436 save->cs.selector, save->cs.attrib, 3437 save->cs.limit, save->cs.base); 3438 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3439 "ss:", 3440 save->ss.selector, save->ss.attrib, 3441 save->ss.limit, save->ss.base); 3442 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3443 "ds:", 3444 save->ds.selector, save->ds.attrib, 3445 save->ds.limit, save->ds.base); 3446 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3447 "fs:", 3448 save->fs.selector, save->fs.attrib, 3449 save->fs.limit, save->fs.base); 3450 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3451 "gs:", 3452 save->gs.selector, save->gs.attrib, 3453 save->gs.limit, save->gs.base); 3454 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3455 "gdtr:", 3456 save->gdtr.selector, save->gdtr.attrib, 3457 save->gdtr.limit, save->gdtr.base); 3458 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3459 "ldtr:", 3460 save->ldtr.selector, save->ldtr.attrib, 3461 save->ldtr.limit, save->ldtr.base); 3462 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3463 "idtr:", 3464 save->idtr.selector, save->idtr.attrib, 3465 save->idtr.limit, save->idtr.base); 3466 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3467 "tr:", 3468 save->tr.selector, save->tr.attrib, 3469 save->tr.limit, save->tr.base); 3470 pr_err("cpl: %d efer: %016llx\n", 3471 save->cpl, save->efer); 3472 pr_err("%-15s %016llx %-13s %016llx\n", 3473 "cr0:", save->cr0, "cr2:", save->cr2); 3474 pr_err("%-15s %016llx %-13s %016llx\n", 3475 "cr3:", save->cr3, "cr4:", save->cr4); 3476 pr_err("%-15s %016llx %-13s %016llx\n", 3477 "dr6:", save->dr6, "dr7:", save->dr7); 3478 pr_err("%-15s %016llx %-13s %016llx\n", 3479 "rip:", save->rip, "rflags:", save->rflags); 3480 pr_err("%-15s %016llx %-13s %016llx\n", 3481 "rsp:", save->rsp, "rax:", save->rax); 3482 pr_err("%-15s %016llx %-13s %016llx\n", 3483 "star:", save->star, "lstar:", save->lstar); 3484 pr_err("%-15s %016llx %-13s %016llx\n", 3485 "cstar:", save->cstar, "sfmask:", save->sfmask); 3486 pr_err("%-15s %016llx %-13s %016llx\n", 3487 "kernel_gs_base:", save->kernel_gs_base, 3488 "sysenter_cs:", save->sysenter_cs); 3489 pr_err("%-15s %016llx %-13s %016llx\n", 3490 "sysenter_esp:", save->sysenter_esp, 3491 "sysenter_eip:", save->sysenter_eip); 3492 pr_err("%-15s %016llx %-13s %016llx\n", 3493 "gpat:", save->g_pat, "dbgctl:", save->dbgctl); 3494 pr_err("%-15s %016llx %-13s %016llx\n", 3495 "br_from:", save->br_from, "br_to:", save->br_to); 3496 pr_err("%-15s %016llx %-13s %016llx\n", 3497 "excp_from:", save->last_excp_from, 3498 "excp_to:", save->last_excp_to); 3499} 3500 3501static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) 3502{ 3503 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; 3504 3505 *info1 = control->exit_info_1; 3506 *info2 = control->exit_info_2; 3507} 3508 3509static int handle_exit(struct kvm_vcpu *vcpu) 3510{ 3511 struct vcpu_svm *svm = to_svm(vcpu); 3512 struct kvm_run *kvm_run = vcpu->run; 3513 u32 exit_code = svm->vmcb->control.exit_code; 3514 3515 if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) 3516 vcpu->arch.cr0 = svm->vmcb->save.cr0; 3517 if (npt_enabled) 3518 vcpu->arch.cr3 = svm->vmcb->save.cr3; 3519 3520 if (unlikely(svm->nested.exit_required)) { 3521 nested_svm_vmexit(svm); 3522 svm->nested.exit_required = false; 3523 3524 return 1; 3525 } 3526 3527 if (is_guest_mode(vcpu)) { 3528 int vmexit; 3529 3530 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, 3531 svm->vmcb->control.exit_info_1, 3532 svm->vmcb->control.exit_info_2, 3533 svm->vmcb->control.exit_int_info, 3534 svm->vmcb->control.exit_int_info_err, 3535 KVM_ISA_SVM); 3536 3537 vmexit = nested_svm_exit_special(svm); 3538 3539 if (vmexit == NESTED_EXIT_CONTINUE) 3540 vmexit = nested_svm_exit_handled(svm); 3541 3542 if (vmexit == NESTED_EXIT_DONE) 3543 return 1; 3544 } 3545 3546 svm_complete_interrupts(svm); 3547 3548 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 3549 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 3550 kvm_run->fail_entry.hardware_entry_failure_reason 3551 = svm->vmcb->control.exit_code; 3552 pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); 3553 dump_vmcb(vcpu); 3554 return 0; 3555 } 3556 3557 if (is_external_interrupt(svm->vmcb->control.exit_int_info) && 3558 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && 3559 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && 3560 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) 3561 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " 3562 "exit_code 0x%x\n", 3563 __func__, svm->vmcb->control.exit_int_info, 3564 exit_code); 3565 3566 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 3567 || !svm_exit_handlers[exit_code]) { 3568 WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code); 3569 kvm_queue_exception(vcpu, UD_VECTOR); 3570 return 1; 3571 } 3572 3573 return svm_exit_handlers[exit_code](svm); 3574} 3575 3576static void reload_tss(struct kvm_vcpu *vcpu) 3577{ 3578 int cpu = raw_smp_processor_id(); 3579 3580 struct svm_cpu_data *sd = per_cpu(svm_data, cpu); 3581 sd->tss_desc->type = 9; /* available 32/64-bit TSS */ 3582 load_TR_desc(); 3583} 3584 3585static void pre_svm_run(struct vcpu_svm *svm) 3586{ 3587 int cpu = raw_smp_processor_id(); 3588 3589 struct svm_cpu_data *sd = per_cpu(svm_data, cpu); 3590 3591 /* FIXME: handle wraparound of asid_generation */ 3592 if (svm->asid_generation != sd->asid_generation) 3593 new_asid(svm, sd); 3594} 3595 3596static void svm_inject_nmi(struct kvm_vcpu *vcpu) 3597{ 3598 struct vcpu_svm *svm = to_svm(vcpu); 3599 3600 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; 3601 vcpu->arch.hflags |= HF_NMI_MASK; 3602 set_intercept(svm, INTERCEPT_IRET); 3603 ++vcpu->stat.nmi_injections; 3604} 3605 3606static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) 3607{ 3608 struct vmcb_control_area *control; 3609 3610 control = &svm->vmcb->control; 3611 control->int_vector = irq; 3612 control->int_ctl &= ~V_INTR_PRIO_MASK; 3613 control->int_ctl |= V_IRQ_MASK | 3614 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); 3615 mark_dirty(svm->vmcb, VMCB_INTR); 3616} 3617 3618static void svm_set_irq(struct kvm_vcpu *vcpu) 3619{ 3620 struct vcpu_svm *svm = to_svm(vcpu); 3621 3622 BUG_ON(!(gif_set(svm))); 3623 3624 trace_kvm_inj_virq(vcpu->arch.interrupt.nr); 3625 ++vcpu->stat.irq_injections; 3626 3627 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | 3628 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; 3629} 3630 3631static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 3632{ 3633 struct vcpu_svm *svm = to_svm(vcpu); 3634 3635 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 3636 return; 3637 3638 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); 3639 3640 if (irr == -1) 3641 return; 3642 3643 if (tpr >= irr) 3644 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 3645} 3646 3647static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) 3648{ 3649 return; 3650} 3651 3652static int svm_vm_has_apicv(struct kvm *kvm) 3653{ 3654 return 0; 3655} 3656 3657static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 3658{ 3659 return; 3660} 3661 3662static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) 3663{ 3664 return; 3665} 3666 3667static int svm_nmi_allowed(struct kvm_vcpu *vcpu) 3668{ 3669 struct vcpu_svm *svm = to_svm(vcpu); 3670 struct vmcb *vmcb = svm->vmcb; 3671 int ret; 3672 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && 3673 !(svm->vcpu.arch.hflags & HF_NMI_MASK); 3674 ret = ret && gif_set(svm) && nested_svm_nmi(svm); 3675 3676 return ret; 3677} 3678 3679static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) 3680{ 3681 struct vcpu_svm *svm = to_svm(vcpu); 3682 3683 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); 3684} 3685 3686static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) 3687{ 3688 struct vcpu_svm *svm = to_svm(vcpu); 3689 3690 if (masked) { 3691 svm->vcpu.arch.hflags |= HF_NMI_MASK; 3692 set_intercept(svm, INTERCEPT_IRET); 3693 } else { 3694 svm->vcpu.arch.hflags &= ~HF_NMI_MASK; 3695 clr_intercept(svm, INTERCEPT_IRET); 3696 } 3697} 3698 3699static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) 3700{ 3701 struct vcpu_svm *svm = to_svm(vcpu); 3702 struct vmcb *vmcb = svm->vmcb; 3703 int ret; 3704 3705 if (!gif_set(svm) || 3706 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) 3707 return 0; 3708 3709 ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); 3710 3711 if (is_guest_mode(vcpu)) 3712 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); 3713 3714 return ret; 3715} 3716 3717static void enable_irq_window(struct kvm_vcpu *vcpu) 3718{ 3719 struct vcpu_svm *svm = to_svm(vcpu); 3720 3721 /* 3722 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes 3723 * 1, because that's a separate STGI/VMRUN intercept. The next time we 3724 * get that intercept, this function will be called again though and 3725 * we'll get the vintr intercept. 3726 */ 3727 if (gif_set(svm) && nested_svm_intr(svm)) { 3728 svm_set_vintr(svm); 3729 svm_inject_irq(svm, 0x0); 3730 } 3731} 3732 3733static void enable_nmi_window(struct kvm_vcpu *vcpu) 3734{ 3735 struct vcpu_svm *svm = to_svm(vcpu); 3736 3737 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) 3738 == HF_NMI_MASK) 3739 return; /* IRET will cause a vm exit */ 3740 3741 /* 3742 * Something prevents NMI from been injected. Single step over possible 3743 * problem (IRET or exception injection or interrupt shadow) 3744 */ 3745 svm->nmi_singlestep = true; 3746 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); 3747} 3748 3749static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) 3750{ 3751 return 0; 3752} 3753 3754static void svm_flush_tlb(struct kvm_vcpu *vcpu) 3755{ 3756 struct vcpu_svm *svm = to_svm(vcpu); 3757 3758 if (static_cpu_has(X86_FEATURE_FLUSHBYASID)) 3759 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; 3760 else 3761 svm->asid_generation--; 3762} 3763 3764static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) 3765{ 3766} 3767 3768static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) 3769{ 3770 struct vcpu_svm *svm = to_svm(vcpu); 3771 3772 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 3773 return; 3774 3775 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { 3776 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; 3777 kvm_set_cr8(vcpu, cr8); 3778 } 3779} 3780 3781static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) 3782{ 3783 struct vcpu_svm *svm = to_svm(vcpu); 3784 u64 cr8; 3785 3786 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 3787 return; 3788 3789 cr8 = kvm_get_cr8(vcpu); 3790 svm->vmcb->control.int_ctl &= ~V_TPR_MASK; 3791 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; 3792} 3793 3794static void svm_complete_interrupts(struct vcpu_svm *svm) 3795{ 3796 u8 vector; 3797 int type; 3798 u32 exitintinfo = svm->vmcb->control.exit_int_info; 3799 unsigned int3_injected = svm->int3_injected; 3800 3801 svm->int3_injected = 0; 3802 3803 /* 3804 * If we've made progress since setting HF_IRET_MASK, we've 3805 * executed an IRET and can allow NMI injection. 3806 */ 3807 if ((svm->vcpu.arch.hflags & HF_IRET_MASK) 3808 && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { 3809 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); 3810 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 3811 } 3812 3813 svm->vcpu.arch.nmi_injected = false; 3814 kvm_clear_exception_queue(&svm->vcpu); 3815 kvm_clear_interrupt_queue(&svm->vcpu); 3816 3817 if (!(exitintinfo & SVM_EXITINTINFO_VALID)) 3818 return; 3819 3820 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 3821 3822 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK; 3823 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK; 3824 3825 switch (type) { 3826 case SVM_EXITINTINFO_TYPE_NMI: 3827 svm->vcpu.arch.nmi_injected = true; 3828 break; 3829 case SVM_EXITINTINFO_TYPE_EXEPT: 3830 /* 3831 * In case of software exceptions, do not reinject the vector, 3832 * but re-execute the instruction instead. Rewind RIP first 3833 * if we emulated INT3 before. 3834 */ 3835 if (kvm_exception_is_soft(vector)) { 3836 if (vector == BP_VECTOR && int3_injected && 3837 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) 3838 kvm_rip_write(&svm->vcpu, 3839 kvm_rip_read(&svm->vcpu) - 3840 int3_injected); 3841 break; 3842 } 3843 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { 3844 u32 err = svm->vmcb->control.exit_int_info_err; 3845 kvm_requeue_exception_e(&svm->vcpu, vector, err); 3846 3847 } else 3848 kvm_requeue_exception(&svm->vcpu, vector); 3849 break; 3850 case SVM_EXITINTINFO_TYPE_INTR: 3851 kvm_queue_interrupt(&svm->vcpu, vector, false); 3852 break; 3853 default: 3854 break; 3855 } 3856} 3857 3858static void svm_cancel_injection(struct kvm_vcpu *vcpu) 3859{ 3860 struct vcpu_svm *svm = to_svm(vcpu); 3861 struct vmcb_control_area *control = &svm->vmcb->control; 3862 3863 control->exit_int_info = control->event_inj; 3864 control->exit_int_info_err = control->event_inj_err; 3865 control->event_inj = 0; 3866 svm_complete_interrupts(svm); 3867} 3868 3869static void svm_vcpu_run(struct kvm_vcpu *vcpu) 3870{ 3871 struct vcpu_svm *svm = to_svm(vcpu); 3872 3873 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; 3874 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; 3875 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; 3876 3877 /* 3878 * A vmexit emulation is required before the vcpu can be executed 3879 * again. 3880 */ 3881 if (unlikely(svm->nested.exit_required)) 3882 return; 3883 3884 pre_svm_run(svm); 3885 3886 sync_lapic_to_cr8(vcpu); 3887 3888 svm->vmcb->save.cr2 = vcpu->arch.cr2; 3889 3890 clgi(); 3891 3892 local_irq_enable(); 3893 3894 asm volatile ( 3895 "push %%" _ASM_BP "; \n\t" 3896 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" 3897 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t" 3898 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t" 3899 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t" 3900 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t" 3901 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t" 3902#ifdef CONFIG_X86_64 3903 "mov %c[r8](%[svm]), %%r8 \n\t" 3904 "mov %c[r9](%[svm]), %%r9 \n\t" 3905 "mov %c[r10](%[svm]), %%r10 \n\t" 3906 "mov %c[r11](%[svm]), %%r11 \n\t" 3907 "mov %c[r12](%[svm]), %%r12 \n\t" 3908 "mov %c[r13](%[svm]), %%r13 \n\t" 3909 "mov %c[r14](%[svm]), %%r14 \n\t" 3910 "mov %c[r15](%[svm]), %%r15 \n\t" 3911#endif 3912 3913 /* Enter guest mode */ 3914 "push %%" _ASM_AX " \n\t" 3915 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t" 3916 __ex(SVM_VMLOAD) "\n\t" 3917 __ex(SVM_VMRUN) "\n\t" 3918 __ex(SVM_VMSAVE) "\n\t" 3919 "pop %%" _ASM_AX " \n\t" 3920 3921 /* Save guest registers, load host registers */ 3922 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t" 3923 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t" 3924 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t" 3925 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t" 3926 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t" 3927 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t" 3928#ifdef CONFIG_X86_64 3929 "mov %%r8, %c[r8](%[svm]) \n\t" 3930 "mov %%r9, %c[r9](%[svm]) \n\t" 3931 "mov %%r10, %c[r10](%[svm]) \n\t" 3932 "mov %%r11, %c[r11](%[svm]) \n\t" 3933 "mov %%r12, %c[r12](%[svm]) \n\t" 3934 "mov %%r13, %c[r13](%[svm]) \n\t" 3935 "mov %%r14, %c[r14](%[svm]) \n\t" 3936 "mov %%r15, %c[r15](%[svm]) \n\t" 3937#endif 3938 "pop %%" _ASM_BP 3939 : 3940 : [svm]"a"(svm), 3941 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), 3942 [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])), 3943 [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])), 3944 [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])), 3945 [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])), 3946 [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])), 3947 [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP])) 3948#ifdef CONFIG_X86_64 3949 , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])), 3950 [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])), 3951 [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])), 3952 [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])), 3953 [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])), 3954 [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])), 3955 [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])), 3956 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) 3957#endif 3958 : "cc", "memory" 3959#ifdef CONFIG_X86_64 3960 , "rbx", "rcx", "rdx", "rsi", "rdi" 3961 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" 3962#else 3963 , "ebx", "ecx", "edx", "esi", "edi" 3964#endif 3965 ); 3966 3967#ifdef CONFIG_X86_64 3968 wrmsrl(MSR_GS_BASE, svm->host.gs_base); 3969#else 3970 loadsegment(fs, svm->host.fs); 3971#ifndef CONFIG_X86_32_LAZY_GS 3972 loadsegment(gs, svm->host.gs); 3973#endif 3974#endif 3975 3976 reload_tss(vcpu); 3977 3978 local_irq_disable(); 3979 3980 vcpu->arch.cr2 = svm->vmcb->save.cr2; 3981 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; 3982 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; 3983 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; 3984 3985 trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM); 3986 3987 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) 3988 kvm_before_handle_nmi(&svm->vcpu); 3989 3990 stgi(); 3991 3992 /* Any pending NMI will happen here */ 3993 3994 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) 3995 kvm_after_handle_nmi(&svm->vcpu); 3996 3997 sync_cr8_to_lapic(vcpu); 3998 3999 svm->next_rip = 0; 4000 4001 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 4002 4003 /* if exit due to PF check for async PF */ 4004 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) 4005 svm->apf_reason = kvm_read_and_reset_pf_reason(); 4006 4007 if (npt_enabled) { 4008 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); 4009 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); 4010 } 4011 4012 /* 4013 * We need to handle MC intercepts here before the vcpu has a chance to 4014 * change the physical cpu 4015 */ 4016 if (unlikely(svm->vmcb->control.exit_code == 4017 SVM_EXIT_EXCP_BASE + MC_VECTOR)) 4018 svm_handle_mce(svm); 4019 4020 mark_all_clean(svm->vmcb); 4021} 4022 4023static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) 4024{ 4025 struct vcpu_svm *svm = to_svm(vcpu); 4026 4027 svm->vmcb->save.cr3 = root; 4028 mark_dirty(svm->vmcb, VMCB_CR); 4029 svm_flush_tlb(vcpu); 4030} 4031 4032static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) 4033{ 4034 struct vcpu_svm *svm = to_svm(vcpu); 4035 4036 svm->vmcb->control.nested_cr3 = root; 4037 mark_dirty(svm->vmcb, VMCB_NPT); 4038 4039 /* Also sync guest cr3 here in case we live migrate */ 4040 svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); 4041 mark_dirty(svm->vmcb, VMCB_CR); 4042 4043 svm_flush_tlb(vcpu); 4044} 4045 4046static int is_disabled(void) 4047{ 4048 u64 vm_cr; 4049 4050 rdmsrl(MSR_VM_CR, vm_cr); 4051 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE)) 4052 return 1; 4053 4054 return 0; 4055} 4056 4057static void 4058svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) 4059{ 4060 /* 4061 * Patch in the VMMCALL instruction: 4062 */ 4063 hypercall[0] = 0x0f; 4064 hypercall[1] = 0x01; 4065 hypercall[2] = 0xd9; 4066} 4067 4068static void svm_check_processor_compat(void *rtn) 4069{ 4070 *(int *)rtn = 0; 4071} 4072 4073static bool svm_cpu_has_accelerated_tpr(void) 4074{ 4075 return false; 4076} 4077 4078static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) 4079{ 4080 return 0; 4081} 4082 4083static void svm_cpuid_update(struct kvm_vcpu *vcpu) 4084{ 4085} 4086 4087static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) 4088{ 4089 switch (func) { 4090 case 0x80000001: 4091 if (nested) 4092 entry->ecx |= (1 << 2); /* Set SVM bit */ 4093 break; 4094 case 0x8000000A: 4095 entry->eax = 1; /* SVM revision 1 */ 4096 entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper 4097 ASID emulation to nested SVM */ 4098 entry->ecx = 0; /* Reserved */ 4099 entry->edx = 0; /* Per default do not support any 4100 additional features */ 4101 4102 /* Support next_rip if host supports it */ 4103 if (boot_cpu_has(X86_FEATURE_NRIPS)) 4104 entry->edx |= SVM_FEATURE_NRIP; 4105 4106 /* Support NPT for the guest if enabled */ 4107 if (npt_enabled) 4108 entry->edx |= SVM_FEATURE_NPT; 4109 4110 break; 4111 } 4112} 4113 4114static int svm_get_lpage_level(void) 4115{ 4116 return PT_PDPE_LEVEL; 4117} 4118 4119static bool svm_rdtscp_supported(void) 4120{ 4121 return false; 4122} 4123 4124static bool svm_invpcid_supported(void) 4125{ 4126 return false; 4127} 4128 4129static bool svm_mpx_supported(void) 4130{ 4131 return false; 4132} 4133 4134static bool svm_xsaves_supported(void) 4135{ 4136 return false; 4137} 4138 4139static bool svm_has_wbinvd_exit(void) 4140{ 4141 return true; 4142} 4143 4144static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) 4145{ 4146 struct vcpu_svm *svm = to_svm(vcpu); 4147 4148 set_exception_intercept(svm, NM_VECTOR); 4149 update_cr0_intercept(svm); 4150} 4151 4152#define PRE_EX(exit) { .exit_code = (exit), \ 4153 .stage = X86_ICPT_PRE_EXCEPT, } 4154#define POST_EX(exit) { .exit_code = (exit), \ 4155 .stage = X86_ICPT_POST_EXCEPT, } 4156#define POST_MEM(exit) { .exit_code = (exit), \ 4157 .stage = X86_ICPT_POST_MEMACCESS, } 4158 4159static const struct __x86_intercept { 4160 u32 exit_code; 4161 enum x86_intercept_stage stage; 4162} x86_intercept_map[] = { 4163 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), 4164 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), 4165 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), 4166 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), 4167 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), 4168 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), 4169 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), 4170 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), 4171 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), 4172 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), 4173 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), 4174 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), 4175 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), 4176 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), 4177 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), 4178 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), 4179 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), 4180 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), 4181 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), 4182 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), 4183 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), 4184 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), 4185 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), 4186 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), 4187 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), 4188 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), 4189 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), 4190 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), 4191 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), 4192 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), 4193 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), 4194 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), 4195 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), 4196 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), 4197 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), 4198 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), 4199 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), 4200 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), 4201 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), 4202 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), 4203 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), 4204 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), 4205 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), 4206 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), 4207 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), 4208 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), 4209}; 4210 4211#undef PRE_EX 4212#undef POST_EX 4213#undef POST_MEM 4214 4215static int svm_check_intercept(struct kvm_vcpu *vcpu, 4216 struct x86_instruction_info *info, 4217 enum x86_intercept_stage stage) 4218{ 4219 struct vcpu_svm *svm = to_svm(vcpu); 4220 int vmexit, ret = X86EMUL_CONTINUE; 4221 struct __x86_intercept icpt_info; 4222 struct vmcb *vmcb = svm->vmcb; 4223 4224 if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) 4225 goto out; 4226 4227 icpt_info = x86_intercept_map[info->intercept]; 4228 4229 if (stage != icpt_info.stage) 4230 goto out; 4231 4232 switch (icpt_info.exit_code) { 4233 case SVM_EXIT_READ_CR0: 4234 if (info->intercept == x86_intercept_cr_read) 4235 icpt_info.exit_code += info->modrm_reg; 4236 break; 4237 case SVM_EXIT_WRITE_CR0: { 4238 unsigned long cr0, val; 4239 u64 intercept; 4240 4241 if (info->intercept == x86_intercept_cr_write) 4242 icpt_info.exit_code += info->modrm_reg; 4243 4244 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 || 4245 info->intercept == x86_intercept_clts) 4246 break; 4247 4248 intercept = svm->nested.intercept; 4249 4250 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) 4251 break; 4252 4253 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; 4254 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; 4255 4256 if (info->intercept == x86_intercept_lmsw) { 4257 cr0 &= 0xfUL; 4258 val &= 0xfUL; 4259 /* lmsw can't clear PE - catch this here */ 4260 if (cr0 & X86_CR0_PE) 4261 val |= X86_CR0_PE; 4262 } 4263 4264 if (cr0 ^ val) 4265 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; 4266 4267 break; 4268 } 4269 case SVM_EXIT_READ_DR0: 4270 case SVM_EXIT_WRITE_DR0: 4271 icpt_info.exit_code += info->modrm_reg; 4272 break; 4273 case SVM_EXIT_MSR: 4274 if (info->intercept == x86_intercept_wrmsr) 4275 vmcb->control.exit_info_1 = 1; 4276 else 4277 vmcb->control.exit_info_1 = 0; 4278 break; 4279 case SVM_EXIT_PAUSE: 4280 /* 4281 * We get this for NOP only, but pause 4282 * is rep not, check this here 4283 */ 4284 if (info->rep_prefix != REPE_PREFIX) 4285 goto out; 4286 case SVM_EXIT_IOIO: { 4287 u64 exit_info; 4288 u32 bytes; 4289 4290 if (info->intercept == x86_intercept_in || 4291 info->intercept == x86_intercept_ins) { 4292 exit_info = ((info->src_val & 0xffff) << 16) | 4293 SVM_IOIO_TYPE_MASK; 4294 bytes = info->dst_bytes; 4295 } else { 4296 exit_info = (info->dst_val & 0xffff) << 16; 4297 bytes = info->src_bytes; 4298 } 4299 4300 if (info->intercept == x86_intercept_outs || 4301 info->intercept == x86_intercept_ins) 4302 exit_info |= SVM_IOIO_STR_MASK; 4303 4304 if (info->rep_prefix) 4305 exit_info |= SVM_IOIO_REP_MASK; 4306 4307 bytes = min(bytes, 4u); 4308 4309 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; 4310 4311 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); 4312 4313 vmcb->control.exit_info_1 = exit_info; 4314 vmcb->control.exit_info_2 = info->next_rip; 4315 4316 break; 4317 } 4318 default: 4319 break; 4320 } 4321 4322 /* TODO: Advertise NRIPS to guest hypervisor unconditionally */ 4323 if (static_cpu_has(X86_FEATURE_NRIPS)) 4324 vmcb->control.next_rip = info->next_rip; 4325 vmcb->control.exit_code = icpt_info.exit_code; 4326 vmexit = nested_svm_exit_handled(svm); 4327 4328 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED 4329 : X86EMUL_CONTINUE; 4330 4331out: 4332 return ret; 4333} 4334 4335static void svm_handle_external_intr(struct kvm_vcpu *vcpu) 4336{ 4337 local_irq_enable(); 4338} 4339 4340static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) 4341{ 4342} 4343 4344static struct kvm_x86_ops svm_x86_ops = { 4345 .cpu_has_kvm_support = has_svm, 4346 .disabled_by_bios = is_disabled, 4347 .hardware_setup = svm_hardware_setup, 4348 .hardware_unsetup = svm_hardware_unsetup, 4349 .check_processor_compatibility = svm_check_processor_compat, 4350 .hardware_enable = svm_hardware_enable, 4351 .hardware_disable = svm_hardware_disable, 4352 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, 4353 4354 .vcpu_create = svm_create_vcpu, 4355 .vcpu_free = svm_free_vcpu, 4356 .vcpu_reset = svm_vcpu_reset, 4357 4358 .prepare_guest_switch = svm_prepare_guest_switch, 4359 .vcpu_load = svm_vcpu_load, 4360 .vcpu_put = svm_vcpu_put, 4361 4362 .update_db_bp_intercept = update_bp_intercept, 4363 .get_msr = svm_get_msr, 4364 .set_msr = svm_set_msr, 4365 .get_segment_base = svm_get_segment_base, 4366 .get_segment = svm_get_segment, 4367 .set_segment = svm_set_segment, 4368 .get_cpl = svm_get_cpl, 4369 .get_cs_db_l_bits = kvm_get_cs_db_l_bits, 4370 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, 4371 .decache_cr3 = svm_decache_cr3, 4372 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 4373 .set_cr0 = svm_set_cr0, 4374 .set_cr3 = svm_set_cr3, 4375 .set_cr4 = svm_set_cr4, 4376 .set_efer = svm_set_efer, 4377 .get_idt = svm_get_idt, 4378 .set_idt = svm_set_idt, 4379 .get_gdt = svm_get_gdt, 4380 .set_gdt = svm_set_gdt, 4381 .get_dr6 = svm_get_dr6, 4382 .set_dr6 = svm_set_dr6, 4383 .set_dr7 = svm_set_dr7, 4384 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, 4385 .cache_reg = svm_cache_reg, 4386 .get_rflags = svm_get_rflags, 4387 .set_rflags = svm_set_rflags, 4388 .fpu_activate = svm_fpu_activate, 4389 .fpu_deactivate = svm_fpu_deactivate, 4390 4391 .tlb_flush = svm_flush_tlb, 4392 4393 .run = svm_vcpu_run, 4394 .handle_exit = handle_exit, 4395 .skip_emulated_instruction = skip_emulated_instruction, 4396 .set_interrupt_shadow = svm_set_interrupt_shadow, 4397 .get_interrupt_shadow = svm_get_interrupt_shadow, 4398 .patch_hypercall = svm_patch_hypercall, 4399 .set_irq = svm_set_irq, 4400 .set_nmi = svm_inject_nmi, 4401 .queue_exception = svm_queue_exception, 4402 .cancel_injection = svm_cancel_injection, 4403 .interrupt_allowed = svm_interrupt_allowed, 4404 .nmi_allowed = svm_nmi_allowed, 4405 .get_nmi_mask = svm_get_nmi_mask, 4406 .set_nmi_mask = svm_set_nmi_mask, 4407 .enable_nmi_window = enable_nmi_window, 4408 .enable_irq_window = enable_irq_window, 4409 .update_cr8_intercept = update_cr8_intercept, 4410 .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, 4411 .vm_has_apicv = svm_vm_has_apicv, 4412 .load_eoi_exitmap = svm_load_eoi_exitmap, 4413 .sync_pir_to_irr = svm_sync_pir_to_irr, 4414 4415 .set_tss_addr = svm_set_tss_addr, 4416 .get_tdp_level = get_npt_level, 4417 .get_mt_mask = svm_get_mt_mask, 4418 4419 .get_exit_info = svm_get_exit_info, 4420 4421 .get_lpage_level = svm_get_lpage_level, 4422 4423 .cpuid_update = svm_cpuid_update, 4424 4425 .rdtscp_supported = svm_rdtscp_supported, 4426 .invpcid_supported = svm_invpcid_supported, 4427 .mpx_supported = svm_mpx_supported, 4428 .xsaves_supported = svm_xsaves_supported, 4429 4430 .set_supported_cpuid = svm_set_supported_cpuid, 4431 4432 .has_wbinvd_exit = svm_has_wbinvd_exit, 4433 4434 .set_tsc_khz = svm_set_tsc_khz, 4435 .read_tsc_offset = svm_read_tsc_offset, 4436 .write_tsc_offset = svm_write_tsc_offset, 4437 .adjust_tsc_offset = svm_adjust_tsc_offset, 4438 .compute_tsc_offset = svm_compute_tsc_offset, 4439 .read_l1_tsc = svm_read_l1_tsc, 4440 4441 .set_tdp_cr3 = set_tdp_cr3, 4442 4443 .check_intercept = svm_check_intercept, 4444 .handle_external_intr = svm_handle_external_intr, 4445 4446 .sched_in = svm_sched_in, 4447}; 4448 4449static int __init svm_init(void) 4450{ 4451 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), 4452 __alignof__(struct vcpu_svm), THIS_MODULE); 4453} 4454 4455static void __exit svm_exit(void) 4456{ 4457 kvm_exit(); 4458} 4459 4460module_init(svm_init) 4461module_exit(svm_exit) 4462