This source file includes following definitions.
- to_kvm_svm
- svm_sev_enabled
- sev_guest
- sev_get_asid
- mark_all_dirty
- mark_all_clean
- mark_dirty
- to_svm
- avic_update_vapic_bar
- avic_vcpu_is_running
- recalc_intercepts
- get_host_vmcb
- set_cr_intercept
- clr_cr_intercept
- is_cr_intercept
- set_dr_intercepts
- clr_dr_intercepts
- set_exception_intercept
- clr_exception_intercept
- set_intercept
- clr_intercept
- vgif_enabled
- enable_gif
- disable_gif
- gif_set
- svm_msrpm_offset
- clgi
- stgi
- invlpga
- get_npt_level
- svm_set_efer
- is_external_interrupt
- svm_get_interrupt_shadow
- svm_set_interrupt_shadow
- skip_emulated_instruction
- svm_queue_exception
- svm_init_erratum_383
- svm_init_osvw
- has_svm
- svm_hardware_disable
- svm_hardware_enable
- svm_cpu_uninit
- svm_cpu_init
- valid_msr_intercept
- msr_write_intercepted
- set_msr_interception
- svm_vcpu_init_msrpm
- add_msr_offset
- init_msrpm_offsets
- svm_enable_lbrv
- svm_disable_lbrv
- disable_nmi_singlestep
- avic_ga_log_notifier
- sev_hardware_setup
- grow_ple_window
- shrink_ple_window
- svm_adjust_mmio_mask
- svm_hardware_setup
- svm_hardware_unsetup
- init_seg
- init_sys_seg
- svm_read_l1_tsc_offset
- svm_write_l1_tsc_offset
- avic_init_vmcb
- init_vmcb
- avic_get_physical_id_entry
- avic_init_access_page
- avic_init_backing_page
- __sev_asid_free
- sev_asid_free
- sev_unbind_asid
- sev_pin_memory
- sev_unpin_memory
- sev_clflush_pages
- __unregister_enc_region_locked
- svm_vm_alloc
- svm_vm_free
- sev_vm_destroy
- avic_vm_destroy
- svm_vm_destroy
- avic_vm_init
- avic_update_iommu_vcpu_affinity
- avic_vcpu_load
- avic_vcpu_put
- avic_set_running
- svm_vcpu_reset
- avic_init_vcpu
- svm_create_vcpu
- svm_clear_current_vmcb
- svm_free_vcpu
- svm_vcpu_load
- svm_vcpu_put
- svm_vcpu_blocking
- svm_vcpu_unblocking
- svm_get_rflags
- svm_set_rflags
- svm_cache_reg
- svm_set_vintr
- svm_clear_vintr
- svm_seg
- svm_get_segment_base
- svm_get_segment
- svm_get_cpl
- svm_get_idt
- svm_set_idt
- svm_get_gdt
- svm_set_gdt
- svm_decache_cr0_guest_bits
- svm_decache_cr3
- svm_decache_cr4_guest_bits
- update_cr0_intercept
- svm_set_cr0
- svm_set_cr4
- svm_set_segment
- update_bp_intercept
- new_asid
- svm_get_dr6
- svm_set_dr6
- svm_sync_dirty_debug_regs
- svm_set_dr7
- pf_interception
- npf_interception
- db_interception
- bp_interception
- ud_interception
- ac_interception
- gp_interception
- is_erratum_383
- svm_handle_mce
- mc_interception
- shutdown_interception
- io_interception
- nmi_interception
- intr_interception
- nop_on_interception
- halt_interception
- vmmcall_interception
- nested_svm_get_tdp_cr3
- nested_svm_get_tdp_pdptr
- nested_svm_set_tdp_cr3
- nested_svm_inject_npf_exit
- nested_svm_init_mmu_context
- nested_svm_uninit_mmu_context
- nested_svm_check_permissions
- nested_svm_check_exception
- nested_svm_intr
- nested_svm_nmi
- nested_svm_intercept_ioio
- nested_svm_exit_handled_msr
- nested_svm_intercept_db
- nested_svm_exit_special
- nested_svm_intercept
- nested_svm_exit_handled
- copy_vmcb_control_area
- nested_svm_vmexit
- nested_svm_vmrun_msrpm
- nested_vmcb_checks
- enter_svm_guest_mode
- nested_svm_vmrun
- nested_svm_vmloadsave
- vmload_interception
- vmsave_interception
- vmrun_interception
- stgi_interception
- clgi_interception
- invlpga_interception
- skinit_interception
- wbinvd_interception
- xsetbv_interception
- rdpru_interception
- task_switch_interception
- cpuid_interception
- iret_interception
- invlpg_interception
- emulate_on_interception
- rsm_interception
- rdpmc_interception
- check_selective_cr0_intercepted
- cr_interception
- dr_interception
- cr8_write_interception
- svm_get_msr_feature
- svm_get_msr
- rdmsr_interception
- svm_set_vm_cr
- svm_set_msr
- wrmsr_interception
- msr_interception
- interrupt_window_interception
- pause_interception
- nop_interception
- monitor_interception
- mwait_interception
- avic_incomplete_ipi_interception
- avic_get_logical_id_entry
- avic_ldr_write
- avic_invalidate_logical_id_entry
- avic_handle_ldr_update
- avic_handle_apic_id_update
- avic_handle_dfr_update
- avic_unaccel_trap_write
- is_avic_unaccelerated_access_trap
- avic_unaccelerated_access_interception
- dump_vmcb
- svm_get_exit_info
- handle_exit
- reload_tss
- pre_sev_run
- pre_svm_run
- svm_inject_nmi
- svm_inject_irq
- svm_set_irq
- svm_nested_virtualize_tpr
- update_cr8_intercept
- svm_set_virtual_apic_mode
- svm_get_enable_apicv
- svm_hwapic_irr_update
- svm_hwapic_isr_update
- svm_refresh_apicv_exec_ctrl
- svm_load_eoi_exitmap
- svm_deliver_avic_intr
- svm_dy_apicv_has_pending_interrupt
- svm_ir_list_del
- svm_ir_list_add
- get_pi_vcpu_info
- svm_update_pi_irte
- svm_nmi_allowed
- svm_get_nmi_mask
- svm_set_nmi_mask
- svm_interrupt_allowed
- enable_irq_window
- enable_nmi_window
- svm_set_tss_addr
- svm_set_identity_map_addr
- svm_flush_tlb
- svm_flush_tlb_gva
- svm_prepare_guest_switch
- sync_cr8_to_lapic
- sync_lapic_to_cr8
- svm_complete_interrupts
- svm_cancel_injection
- svm_vcpu_run
- svm_set_cr3
- set_tdp_cr3
- is_disabled
- svm_patch_hypercall
- svm_check_processor_compat
- svm_cpu_has_accelerated_tpr
- svm_has_emulated_msr
- svm_get_mt_mask
- svm_cpuid_update
- svm_set_supported_cpuid
- svm_get_lpage_level
- svm_rdtscp_supported
- svm_invpcid_supported
- svm_mpx_supported
- svm_xsaves_supported
- svm_umip_emulated
- svm_pt_supported
- svm_has_wbinvd_exit
- svm_pku_supported
- svm_check_intercept
- svm_handle_exit_irqoff
- svm_sched_in
- avic_post_state_restore
- svm_setup_mce
- svm_smi_allowed
- svm_pre_enter_smm
- svm_pre_leave_smm
- enable_smi_window
- sev_asid_new
- sev_guest_init
- sev_bind_asid
- __sev_issue_cmd
- sev_issue_cmd
- sev_launch_start
- get_num_contig_pages
- sev_launch_update_data
- sev_launch_measure
- sev_launch_finish
- sev_guest_status
- __sev_issue_dbg_cmd
- __sev_dbg_decrypt
- __sev_dbg_decrypt_user
- __sev_dbg_encrypt_user
- sev_dbg_crypt
- sev_launch_secret
- svm_mem_enc_op
- svm_register_enc_region
- find_enc_region
- svm_unregister_enc_region
- svm_need_emulation_on_page_fault
- svm_apic_init_signal_blocked
- svm_init
- svm_exit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 #define pr_fmt(fmt) "SVM: " fmt
16
17 #include <linux/kvm_host.h>
18
19 #include "irq.h"
20 #include "mmu.h"
21 #include "kvm_cache_regs.h"
22 #include "x86.h"
23 #include "cpuid.h"
24 #include "pmu.h"
25
26 #include <linux/module.h>
27 #include <linux/mod_devicetable.h>
28 #include <linux/kernel.h>
29 #include <linux/vmalloc.h>
30 #include <linux/highmem.h>
31 #include <linux/sched.h>
32 #include <linux/trace_events.h>
33 #include <linux/slab.h>
34 #include <linux/amd-iommu.h>
35 #include <linux/hashtable.h>
36 #include <linux/frame.h>
37 #include <linux/psp-sev.h>
38 #include <linux/file.h>
39 #include <linux/pagemap.h>
40 #include <linux/swap.h>
41
42 #include <asm/apic.h>
43 #include <asm/perf_event.h>
44 #include <asm/tlbflush.h>
45 #include <asm/desc.h>
46 #include <asm/debugreg.h>
47 #include <asm/kvm_para.h>
48 #include <asm/irq_remapping.h>
49 #include <asm/spec-ctrl.h>
50
51 #include <asm/virtext.h>
52 #include "trace.h"
53
54 #define __ex(x) __kvm_handle_fault_on_reboot(x)
55
56 MODULE_AUTHOR("Qumranet");
57 MODULE_LICENSE("GPL");
58
59 static const struct x86_cpu_id svm_cpu_id[] = {
60 X86_FEATURE_MATCH(X86_FEATURE_SVM),
61 {}
62 };
63 MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
64
65 #define IOPM_ALLOC_ORDER 2
66 #define MSRPM_ALLOC_ORDER 1
67
68 #define SEG_TYPE_LDT 2
69 #define SEG_TYPE_BUSY_TSS16 3
70
71 #define SVM_FEATURE_LBRV (1 << 1)
72 #define SVM_FEATURE_SVML (1 << 2)
73 #define SVM_FEATURE_TSC_RATE (1 << 4)
74 #define SVM_FEATURE_VMCB_CLEAN (1 << 5)
75 #define SVM_FEATURE_FLUSH_ASID (1 << 6)
76 #define SVM_FEATURE_DECODE_ASSIST (1 << 7)
77 #define SVM_FEATURE_PAUSE_FILTER (1 << 10)
78
79 #define SVM_AVIC_DOORBELL 0xc001011b
80
81 #define NESTED_EXIT_HOST 0
82 #define NESTED_EXIT_DONE 1
83 #define NESTED_EXIT_CONTINUE 2
84
85 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
86
87 #define TSC_RATIO_RSVD 0xffffff0000000000ULL
88 #define TSC_RATIO_MIN 0x0000000000000001ULL
89 #define TSC_RATIO_MAX 0x000000ffffffffffULL
90
91 #define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
92
93
94
95
96
97 #define AVIC_MAX_PHYSICAL_ID_COUNT 255
98
99 #define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
100 #define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
101 #define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
102
103
104 #define AVIC_VCPU_ID_BITS 8
105 #define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
106
107 #define AVIC_VM_ID_BITS 24
108 #define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
109 #define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
110
111 #define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
112 (y & AVIC_VCPU_ID_MASK))
113 #define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
114 #define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
115
116 static bool erratum_383_found __read_mostly;
117
118 static const u32 host_save_user_msrs[] = {
119 #ifdef CONFIG_X86_64
120 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
121 MSR_FS_BASE,
122 #endif
123 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
124 MSR_TSC_AUX,
125 };
126
127 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
128
129 struct kvm_sev_info {
130 bool active;
131 unsigned int asid;
132 unsigned int handle;
133 int fd;
134 unsigned long pages_locked;
135 struct list_head regions_list;
136 };
137
138 struct kvm_svm {
139 struct kvm kvm;
140
141
142 u32 avic_vm_id;
143 struct page *avic_logical_id_table_page;
144 struct page *avic_physical_id_table_page;
145 struct hlist_node hnode;
146
147 struct kvm_sev_info sev_info;
148 };
149
150 struct kvm_vcpu;
151
152 struct nested_state {
153 struct vmcb *hsave;
154 u64 hsave_msr;
155 u64 vm_cr_msr;
156 u64 vmcb;
157
158
159 u32 *msrpm;
160
161
162 u64 vmcb_msrpm;
163 u64 vmcb_iopm;
164
165
166 bool exit_required;
167
168
169 u32 intercept_cr;
170 u32 intercept_dr;
171 u32 intercept_exceptions;
172 u64 intercept;
173
174
175 u64 nested_cr3;
176 };
177
178 #define MSRPM_OFFSETS 16
179 static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
180
181
182
183
184
185 static uint64_t osvw_len = 4, osvw_status;
186
187 struct vcpu_svm {
188 struct kvm_vcpu vcpu;
189 struct vmcb *vmcb;
190 unsigned long vmcb_pa;
191 struct svm_cpu_data *svm_data;
192 uint64_t asid_generation;
193 uint64_t sysenter_esp;
194 uint64_t sysenter_eip;
195 uint64_t tsc_aux;
196
197 u64 msr_decfg;
198
199 u64 next_rip;
200
201 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
202 struct {
203 u16 fs;
204 u16 gs;
205 u16 ldt;
206 u64 gs_base;
207 } host;
208
209 u64 spec_ctrl;
210
211
212
213
214
215 u64 virt_spec_ctrl;
216
217 u32 *msrpm;
218
219 ulong nmi_iret_rip;
220
221 struct nested_state nested;
222
223 bool nmi_singlestep;
224 u64 nmi_singlestep_guest_rflags;
225
226 unsigned int3_injected;
227 unsigned long int3_rip;
228
229
230 bool nrips_enabled : 1;
231
232 u32 ldr_reg;
233 u32 dfr_reg;
234 struct page *avic_backing_page;
235 u64 *avic_physical_id_cache;
236 bool avic_is_running;
237
238
239
240
241
242
243
244 struct list_head ir_list;
245 spinlock_t ir_list_lock;
246
247
248 unsigned int last_cpu;
249 };
250
251
252
253
254 struct amd_svm_iommu_ir {
255 struct list_head node;
256 void *data;
257 };
258
259 #define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
260 #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
261 #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
262
263 #define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
264 #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
265 #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
266 #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
267
268 static DEFINE_PER_CPU(u64, current_tsc_ratio);
269 #define TSC_RATIO_DEFAULT 0x0100000000ULL
270
271 #define MSR_INVALID 0xffffffffU
272
273 static const struct svm_direct_access_msrs {
274 u32 index;
275 bool always;
276 } direct_access_msrs[] = {
277 { .index = MSR_STAR, .always = true },
278 { .index = MSR_IA32_SYSENTER_CS, .always = true },
279 #ifdef CONFIG_X86_64
280 { .index = MSR_GS_BASE, .always = true },
281 { .index = MSR_FS_BASE, .always = true },
282 { .index = MSR_KERNEL_GS_BASE, .always = true },
283 { .index = MSR_LSTAR, .always = true },
284 { .index = MSR_CSTAR, .always = true },
285 { .index = MSR_SYSCALL_MASK, .always = true },
286 #endif
287 { .index = MSR_IA32_SPEC_CTRL, .always = false },
288 { .index = MSR_IA32_PRED_CMD, .always = false },
289 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
290 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
291 { .index = MSR_IA32_LASTINTFROMIP, .always = false },
292 { .index = MSR_IA32_LASTINTTOIP, .always = false },
293 { .index = MSR_INVALID, .always = false },
294 };
295
296
297 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
298 static bool npt_enabled = true;
299 #else
300 static bool npt_enabled;
301 #endif
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333 static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
334 module_param(pause_filter_thresh, ushort, 0444);
335
336 static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
337 module_param(pause_filter_count, ushort, 0444);
338
339
340 static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
341 module_param(pause_filter_count_grow, ushort, 0444);
342
343
344 static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
345 module_param(pause_filter_count_shrink, ushort, 0444);
346
347
348 static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
349 module_param(pause_filter_count_max, ushort, 0444);
350
351
352 static int npt = true;
353 module_param(npt, int, S_IRUGO);
354
355
356 static int nested = true;
357 module_param(nested, int, S_IRUGO);
358
359
360 static int avic;
361 #ifdef CONFIG_X86_LOCAL_APIC
362 module_param(avic, int, S_IRUGO);
363 #endif
364
365
366 static int nrips = true;
367 module_param(nrips, int, 0444);
368
369
370 static int vls = true;
371 module_param(vls, int, 0444);
372
373
374 static int vgif = true;
375 module_param(vgif, int, 0444);
376
377
378 static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
379 module_param(sev, int, 0444);
380
381 static bool __read_mostly dump_invalid_vmcb = 0;
382 module_param(dump_invalid_vmcb, bool, 0644);
383
384 static u8 rsm_ins_bytes[] = "\x0f\xaa";
385
386 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
387 static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
388 static void svm_complete_interrupts(struct vcpu_svm *svm);
389
390 static int nested_svm_exit_handled(struct vcpu_svm *svm);
391 static int nested_svm_intercept(struct vcpu_svm *svm);
392 static int nested_svm_vmexit(struct vcpu_svm *svm);
393 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
394 bool has_error_code, u32 error_code);
395
396 enum {
397 VMCB_INTERCEPTS,
398
399 VMCB_PERM_MAP,
400 VMCB_ASID,
401 VMCB_INTR,
402 VMCB_NPT,
403 VMCB_CR,
404 VMCB_DR,
405 VMCB_DT,
406 VMCB_SEG,
407 VMCB_CR2,
408 VMCB_LBR,
409 VMCB_AVIC,
410
411
412
413 VMCB_DIRTY_MAX,
414 };
415
416
417 #define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
418
419 #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
420
421 static unsigned int max_sev_asid;
422 static unsigned int min_sev_asid;
423 static unsigned long *sev_asid_bitmap;
424 #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
425
426 struct enc_region {
427 struct list_head list;
428 unsigned long npages;
429 struct page **pages;
430 unsigned long uaddr;
431 unsigned long size;
432 };
433
434
435 static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
436 {
437 return container_of(kvm, struct kvm_svm, kvm);
438 }
439
440 static inline bool svm_sev_enabled(void)
441 {
442 return IS_ENABLED(CONFIG_KVM_AMD_SEV) ? max_sev_asid : 0;
443 }
444
445 static inline bool sev_guest(struct kvm *kvm)
446 {
447 #ifdef CONFIG_KVM_AMD_SEV
448 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
449
450 return sev->active;
451 #else
452 return false;
453 #endif
454 }
455
456 static inline int sev_get_asid(struct kvm *kvm)
457 {
458 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
459
460 return sev->asid;
461 }
462
463 static inline void mark_all_dirty(struct vmcb *vmcb)
464 {
465 vmcb->control.clean = 0;
466 }
467
468 static inline void mark_all_clean(struct vmcb *vmcb)
469 {
470 vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
471 & ~VMCB_ALWAYS_DIRTY_MASK;
472 }
473
474 static inline void mark_dirty(struct vmcb *vmcb, int bit)
475 {
476 vmcb->control.clean &= ~(1 << bit);
477 }
478
479 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
480 {
481 return container_of(vcpu, struct vcpu_svm, vcpu);
482 }
483
484 static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
485 {
486 svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
487 mark_dirty(svm->vmcb, VMCB_AVIC);
488 }
489
490 static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
491 {
492 struct vcpu_svm *svm = to_svm(vcpu);
493 u64 *entry = svm->avic_physical_id_cache;
494
495 if (!entry)
496 return false;
497
498 return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
499 }
500
501 static void recalc_intercepts(struct vcpu_svm *svm)
502 {
503 struct vmcb_control_area *c, *h;
504 struct nested_state *g;
505
506 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
507
508 if (!is_guest_mode(&svm->vcpu))
509 return;
510
511 c = &svm->vmcb->control;
512 h = &svm->nested.hsave->control;
513 g = &svm->nested;
514
515 c->intercept_cr = h->intercept_cr | g->intercept_cr;
516 c->intercept_dr = h->intercept_dr | g->intercept_dr;
517 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
518 c->intercept = h->intercept | g->intercept;
519 }
520
521 static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
522 {
523 if (is_guest_mode(&svm->vcpu))
524 return svm->nested.hsave;
525 else
526 return svm->vmcb;
527 }
528
529 static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
530 {
531 struct vmcb *vmcb = get_host_vmcb(svm);
532
533 vmcb->control.intercept_cr |= (1U << bit);
534
535 recalc_intercepts(svm);
536 }
537
538 static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
539 {
540 struct vmcb *vmcb = get_host_vmcb(svm);
541
542 vmcb->control.intercept_cr &= ~(1U << bit);
543
544 recalc_intercepts(svm);
545 }
546
547 static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
548 {
549 struct vmcb *vmcb = get_host_vmcb(svm);
550
551 return vmcb->control.intercept_cr & (1U << bit);
552 }
553
554 static inline void set_dr_intercepts(struct vcpu_svm *svm)
555 {
556 struct vmcb *vmcb = get_host_vmcb(svm);
557
558 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
559 | (1 << INTERCEPT_DR1_READ)
560 | (1 << INTERCEPT_DR2_READ)
561 | (1 << INTERCEPT_DR3_READ)
562 | (1 << INTERCEPT_DR4_READ)
563 | (1 << INTERCEPT_DR5_READ)
564 | (1 << INTERCEPT_DR6_READ)
565 | (1 << INTERCEPT_DR7_READ)
566 | (1 << INTERCEPT_DR0_WRITE)
567 | (1 << INTERCEPT_DR1_WRITE)
568 | (1 << INTERCEPT_DR2_WRITE)
569 | (1 << INTERCEPT_DR3_WRITE)
570 | (1 << INTERCEPT_DR4_WRITE)
571 | (1 << INTERCEPT_DR5_WRITE)
572 | (1 << INTERCEPT_DR6_WRITE)
573 | (1 << INTERCEPT_DR7_WRITE);
574
575 recalc_intercepts(svm);
576 }
577
578 static inline void clr_dr_intercepts(struct vcpu_svm *svm)
579 {
580 struct vmcb *vmcb = get_host_vmcb(svm);
581
582 vmcb->control.intercept_dr = 0;
583
584 recalc_intercepts(svm);
585 }
586
587 static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
588 {
589 struct vmcb *vmcb = get_host_vmcb(svm);
590
591 vmcb->control.intercept_exceptions |= (1U << bit);
592
593 recalc_intercepts(svm);
594 }
595
596 static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
597 {
598 struct vmcb *vmcb = get_host_vmcb(svm);
599
600 vmcb->control.intercept_exceptions &= ~(1U << bit);
601
602 recalc_intercepts(svm);
603 }
604
605 static inline void set_intercept(struct vcpu_svm *svm, int bit)
606 {
607 struct vmcb *vmcb = get_host_vmcb(svm);
608
609 vmcb->control.intercept |= (1ULL << bit);
610
611 recalc_intercepts(svm);
612 }
613
614 static inline void clr_intercept(struct vcpu_svm *svm, int bit)
615 {
616 struct vmcb *vmcb = get_host_vmcb(svm);
617
618 vmcb->control.intercept &= ~(1ULL << bit);
619
620 recalc_intercepts(svm);
621 }
622
623 static inline bool vgif_enabled(struct vcpu_svm *svm)
624 {
625 return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
626 }
627
628 static inline void enable_gif(struct vcpu_svm *svm)
629 {
630 if (vgif_enabled(svm))
631 svm->vmcb->control.int_ctl |= V_GIF_MASK;
632 else
633 svm->vcpu.arch.hflags |= HF_GIF_MASK;
634 }
635
636 static inline void disable_gif(struct vcpu_svm *svm)
637 {
638 if (vgif_enabled(svm))
639 svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
640 else
641 svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
642 }
643
644 static inline bool gif_set(struct vcpu_svm *svm)
645 {
646 if (vgif_enabled(svm))
647 return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
648 else
649 return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
650 }
651
652 static unsigned long iopm_base;
653
654 struct kvm_ldttss_desc {
655 u16 limit0;
656 u16 base0;
657 unsigned base1:8, type:5, dpl:2, p:1;
658 unsigned limit1:4, zero0:3, g:1, base2:8;
659 u32 base3;
660 u32 zero1;
661 } __attribute__((packed));
662
663 struct svm_cpu_data {
664 int cpu;
665
666 u64 asid_generation;
667 u32 max_asid;
668 u32 next_asid;
669 u32 min_asid;
670 struct kvm_ldttss_desc *tss_desc;
671
672 struct page *save_area;
673 struct vmcb *current_vmcb;
674
675
676 struct vmcb **sev_vmcbs;
677 };
678
679 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
680
681 static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
682
683 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
684 #define MSRS_RANGE_SIZE 2048
685 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
686
687 static u32 svm_msrpm_offset(u32 msr)
688 {
689 u32 offset;
690 int i;
691
692 for (i = 0; i < NUM_MSR_MAPS; i++) {
693 if (msr < msrpm_ranges[i] ||
694 msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
695 continue;
696
697 offset = (msr - msrpm_ranges[i]) / 4;
698 offset += (i * MSRS_RANGE_SIZE);
699
700
701 return offset / 4;
702 }
703
704
705 return MSR_INVALID;
706 }
707
708 #define MAX_INST_SIZE 15
709
710 static inline void clgi(void)
711 {
712 asm volatile (__ex("clgi"));
713 }
714
715 static inline void stgi(void)
716 {
717 asm volatile (__ex("stgi"));
718 }
719
720 static inline void invlpga(unsigned long addr, u32 asid)
721 {
722 asm volatile (__ex("invlpga %1, %0") : : "c"(asid), "a"(addr));
723 }
724
725 static int get_npt_level(struct kvm_vcpu *vcpu)
726 {
727 #ifdef CONFIG_X86_64
728 return PT64_ROOT_4LEVEL;
729 #else
730 return PT32E_ROOT_LEVEL;
731 #endif
732 }
733
734 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
735 {
736 vcpu->arch.efer = efer;
737
738 if (!npt_enabled) {
739
740 efer |= EFER_NX;
741
742 if (!(efer & EFER_LMA))
743 efer &= ~EFER_LME;
744 }
745
746 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
747 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
748 }
749
750 static int is_external_interrupt(u32 info)
751 {
752 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
753 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
754 }
755
756 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
757 {
758 struct vcpu_svm *svm = to_svm(vcpu);
759 u32 ret = 0;
760
761 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
762 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
763 return ret;
764 }
765
766 static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
767 {
768 struct vcpu_svm *svm = to_svm(vcpu);
769
770 if (mask == 0)
771 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
772 else
773 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
774
775 }
776
777 static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
778 {
779 struct vcpu_svm *svm = to_svm(vcpu);
780
781 if (nrips && svm->vmcb->control.next_rip != 0) {
782 WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
783 svm->next_rip = svm->vmcb->control.next_rip;
784 }
785
786 if (!svm->next_rip) {
787 if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
788 return 0;
789 } else {
790 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
791 pr_err("%s: ip 0x%lx next 0x%llx\n",
792 __func__, kvm_rip_read(vcpu), svm->next_rip);
793 kvm_rip_write(vcpu, svm->next_rip);
794 }
795 svm_set_interrupt_shadow(vcpu, 0);
796
797 return 1;
798 }
799
800 static void svm_queue_exception(struct kvm_vcpu *vcpu)
801 {
802 struct vcpu_svm *svm = to_svm(vcpu);
803 unsigned nr = vcpu->arch.exception.nr;
804 bool has_error_code = vcpu->arch.exception.has_error_code;
805 bool reinject = vcpu->arch.exception.injected;
806 u32 error_code = vcpu->arch.exception.error_code;
807
808
809
810
811
812 if (!reinject &&
813 nested_svm_check_exception(svm, nr, has_error_code, error_code))
814 return;
815
816 kvm_deliver_exception_payload(&svm->vcpu);
817
818 if (nr == BP_VECTOR && !nrips) {
819 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
820
821
822
823
824
825
826
827
828 (void)skip_emulated_instruction(&svm->vcpu);
829 rip = kvm_rip_read(&svm->vcpu);
830 svm->int3_rip = rip + svm->vmcb->save.cs.base;
831 svm->int3_injected = rip - old_rip;
832 }
833
834 svm->vmcb->control.event_inj = nr
835 | SVM_EVTINJ_VALID
836 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
837 | SVM_EVTINJ_TYPE_EXEPT;
838 svm->vmcb->control.event_inj_err = error_code;
839 }
840
841 static void svm_init_erratum_383(void)
842 {
843 u32 low, high;
844 int err;
845 u64 val;
846
847 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH))
848 return;
849
850
851 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
852 if (err)
853 return;
854
855 val |= (1ULL << 47);
856
857 low = lower_32_bits(val);
858 high = upper_32_bits(val);
859
860 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
861
862 erratum_383_found = true;
863 }
864
865 static void svm_init_osvw(struct kvm_vcpu *vcpu)
866 {
867
868
869
870
871 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
872 vcpu->arch.osvw.status = osvw_status & ~(6ULL);
873
874
875
876
877
878
879
880
881
882 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
883 vcpu->arch.osvw.status |= 1;
884 }
885
886 static int has_svm(void)
887 {
888 const char *msg;
889
890 if (!cpu_has_svm(&msg)) {
891 printk(KERN_INFO "has_svm: %s\n", msg);
892 return 0;
893 }
894
895 return 1;
896 }
897
898 static void svm_hardware_disable(void)
899 {
900
901 if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
902 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
903
904 cpu_svm_disable();
905
906 amd_pmu_disable_virt();
907 }
908
909 static int svm_hardware_enable(void)
910 {
911
912 struct svm_cpu_data *sd;
913 uint64_t efer;
914 struct desc_struct *gdt;
915 int me = raw_smp_processor_id();
916
917 rdmsrl(MSR_EFER, efer);
918 if (efer & EFER_SVME)
919 return -EBUSY;
920
921 if (!has_svm()) {
922 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
923 return -EINVAL;
924 }
925 sd = per_cpu(svm_data, me);
926 if (!sd) {
927 pr_err("%s: svm_data is NULL on %d\n", __func__, me);
928 return -EINVAL;
929 }
930
931 sd->asid_generation = 1;
932 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
933 sd->next_asid = sd->max_asid + 1;
934 sd->min_asid = max_sev_asid + 1;
935
936 gdt = get_current_gdt_rw();
937 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
938
939 wrmsrl(MSR_EFER, efer | EFER_SVME);
940
941 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
942
943 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
944 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
945 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT);
946 }
947
948
949
950
951
952
953
954
955
956
957
958 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
959 uint64_t len, status = 0;
960 int err;
961
962 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
963 if (!err)
964 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
965 &err);
966
967 if (err)
968 osvw_status = osvw_len = 0;
969 else {
970 if (len < osvw_len)
971 osvw_len = len;
972 osvw_status |= status;
973 osvw_status &= (1ULL << osvw_len) - 1;
974 }
975 } else
976 osvw_status = osvw_len = 0;
977
978 svm_init_erratum_383();
979
980 amd_pmu_enable_virt();
981
982 return 0;
983 }
984
985 static void svm_cpu_uninit(int cpu)
986 {
987 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
988
989 if (!sd)
990 return;
991
992 per_cpu(svm_data, raw_smp_processor_id()) = NULL;
993 kfree(sd->sev_vmcbs);
994 __free_page(sd->save_area);
995 kfree(sd);
996 }
997
998 static int svm_cpu_init(int cpu)
999 {
1000 struct svm_cpu_data *sd;
1001
1002 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
1003 if (!sd)
1004 return -ENOMEM;
1005 sd->cpu = cpu;
1006 sd->save_area = alloc_page(GFP_KERNEL);
1007 if (!sd->save_area)
1008 goto free_cpu_data;
1009
1010 if (svm_sev_enabled()) {
1011 sd->sev_vmcbs = kmalloc_array(max_sev_asid + 1,
1012 sizeof(void *),
1013 GFP_KERNEL);
1014 if (!sd->sev_vmcbs)
1015 goto free_save_area;
1016 }
1017
1018 per_cpu(svm_data, cpu) = sd;
1019
1020 return 0;
1021
1022 free_save_area:
1023 __free_page(sd->save_area);
1024 free_cpu_data:
1025 kfree(sd);
1026 return -ENOMEM;
1027
1028 }
1029
1030 static bool valid_msr_intercept(u32 index)
1031 {
1032 int i;
1033
1034 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
1035 if (direct_access_msrs[i].index == index)
1036 return true;
1037
1038 return false;
1039 }
1040
1041 static bool msr_write_intercepted(struct kvm_vcpu *vcpu, unsigned msr)
1042 {
1043 u8 bit_write;
1044 unsigned long tmp;
1045 u32 offset;
1046 u32 *msrpm;
1047
1048 msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
1049 to_svm(vcpu)->msrpm;
1050
1051 offset = svm_msrpm_offset(msr);
1052 bit_write = 2 * (msr & 0x0f) + 1;
1053 tmp = msrpm[offset];
1054
1055 BUG_ON(offset == MSR_INVALID);
1056
1057 return !!test_bit(bit_write, &tmp);
1058 }
1059
1060 static void set_msr_interception(u32 *msrpm, unsigned msr,
1061 int read, int write)
1062 {
1063 u8 bit_read, bit_write;
1064 unsigned long tmp;
1065 u32 offset;
1066
1067
1068
1069
1070
1071 WARN_ON(!valid_msr_intercept(msr));
1072
1073 offset = svm_msrpm_offset(msr);
1074 bit_read = 2 * (msr & 0x0f);
1075 bit_write = 2 * (msr & 0x0f) + 1;
1076 tmp = msrpm[offset];
1077
1078 BUG_ON(offset == MSR_INVALID);
1079
1080 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp);
1081 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
1082
1083 msrpm[offset] = tmp;
1084 }
1085
1086 static void svm_vcpu_init_msrpm(u32 *msrpm)
1087 {
1088 int i;
1089
1090 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
1091
1092 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
1093 if (!direct_access_msrs[i].always)
1094 continue;
1095
1096 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
1097 }
1098 }
1099
1100 static void add_msr_offset(u32 offset)
1101 {
1102 int i;
1103
1104 for (i = 0; i < MSRPM_OFFSETS; ++i) {
1105
1106
1107 if (msrpm_offsets[i] == offset)
1108 return;
1109
1110
1111 if (msrpm_offsets[i] != MSR_INVALID)
1112 continue;
1113
1114
1115 msrpm_offsets[i] = offset;
1116
1117 return;
1118 }
1119
1120
1121
1122
1123
1124 BUG();
1125 }
1126
1127 static void init_msrpm_offsets(void)
1128 {
1129 int i;
1130
1131 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
1132
1133 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
1134 u32 offset;
1135
1136 offset = svm_msrpm_offset(direct_access_msrs[i].index);
1137 BUG_ON(offset == MSR_INVALID);
1138
1139 add_msr_offset(offset);
1140 }
1141 }
1142
1143 static void svm_enable_lbrv(struct vcpu_svm *svm)
1144 {
1145 u32 *msrpm = svm->msrpm;
1146
1147 svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
1148 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
1149 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
1150 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
1151 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
1152 }
1153
1154 static void svm_disable_lbrv(struct vcpu_svm *svm)
1155 {
1156 u32 *msrpm = svm->msrpm;
1157
1158 svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
1159 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
1160 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
1161 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
1162 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
1163 }
1164
1165 static void disable_nmi_singlestep(struct vcpu_svm *svm)
1166 {
1167 svm->nmi_singlestep = false;
1168
1169 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
1170
1171 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
1172 svm->vmcb->save.rflags &= ~X86_EFLAGS_TF;
1173 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
1174 svm->vmcb->save.rflags &= ~X86_EFLAGS_RF;
1175 }
1176 }
1177
1178
1179
1180
1181
1182
1183 #define SVM_VM_DATA_HASH_BITS 8
1184 static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
1185 static u32 next_vm_id = 0;
1186 static bool next_vm_id_wrapped = 0;
1187 static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
1188
1189
1190
1191
1192
1193 static int avic_ga_log_notifier(u32 ga_tag)
1194 {
1195 unsigned long flags;
1196 struct kvm_svm *kvm_svm;
1197 struct kvm_vcpu *vcpu = NULL;
1198 u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
1199 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
1200
1201 pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
1202
1203 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1204 hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
1205 if (kvm_svm->avic_vm_id != vm_id)
1206 continue;
1207 vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
1208 break;
1209 }
1210 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1211
1212
1213
1214
1215
1216
1217 if (vcpu)
1218 kvm_vcpu_wake_up(vcpu);
1219
1220 return 0;
1221 }
1222
1223 static __init int sev_hardware_setup(void)
1224 {
1225 struct sev_user_data_status *status;
1226 int rc;
1227
1228
1229 max_sev_asid = cpuid_ecx(0x8000001F);
1230
1231 if (!max_sev_asid)
1232 return 1;
1233
1234
1235 min_sev_asid = cpuid_edx(0x8000001F);
1236
1237
1238 sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
1239 if (!sev_asid_bitmap)
1240 return 1;
1241
1242 status = kmalloc(sizeof(*status), GFP_KERNEL);
1243 if (!status)
1244 return 1;
1245
1246
1247
1248
1249
1250
1251
1252
1253 rc = sev_platform_status(status, NULL);
1254 if (rc)
1255 goto err;
1256
1257 pr_info("SEV supported\n");
1258
1259 err:
1260 kfree(status);
1261 return rc;
1262 }
1263
1264 static void grow_ple_window(struct kvm_vcpu *vcpu)
1265 {
1266 struct vcpu_svm *svm = to_svm(vcpu);
1267 struct vmcb_control_area *control = &svm->vmcb->control;
1268 int old = control->pause_filter_count;
1269
1270 control->pause_filter_count = __grow_ple_window(old,
1271 pause_filter_count,
1272 pause_filter_count_grow,
1273 pause_filter_count_max);
1274
1275 if (control->pause_filter_count != old) {
1276 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1277 trace_kvm_ple_window_update(vcpu->vcpu_id,
1278 control->pause_filter_count, old);
1279 }
1280 }
1281
1282 static void shrink_ple_window(struct kvm_vcpu *vcpu)
1283 {
1284 struct vcpu_svm *svm = to_svm(vcpu);
1285 struct vmcb_control_area *control = &svm->vmcb->control;
1286 int old = control->pause_filter_count;
1287
1288 control->pause_filter_count =
1289 __shrink_ple_window(old,
1290 pause_filter_count,
1291 pause_filter_count_shrink,
1292 pause_filter_count);
1293 if (control->pause_filter_count != old) {
1294 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1295 trace_kvm_ple_window_update(vcpu->vcpu_id,
1296 control->pause_filter_count, old);
1297 }
1298 }
1299
1300
1301
1302
1303
1304
1305
1306 static __init void svm_adjust_mmio_mask(void)
1307 {
1308 unsigned int enc_bit, mask_bit;
1309 u64 msr, mask;
1310
1311
1312 if (cpuid_eax(0x80000000) < 0x8000001f)
1313 return;
1314
1315
1316 rdmsrl(MSR_K8_SYSCFG, msr);
1317 if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
1318 return;
1319
1320 enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
1321 mask_bit = boot_cpu_data.x86_phys_bits;
1322
1323
1324 if (enc_bit == mask_bit)
1325 mask_bit++;
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336 mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
1337
1338 kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
1339 }
1340
1341 static __init int svm_hardware_setup(void)
1342 {
1343 int cpu;
1344 struct page *iopm_pages;
1345 void *iopm_va;
1346 int r;
1347
1348 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
1349
1350 if (!iopm_pages)
1351 return -ENOMEM;
1352
1353 iopm_va = page_address(iopm_pages);
1354 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
1355 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
1356
1357 init_msrpm_offsets();
1358
1359 if (boot_cpu_has(X86_FEATURE_NX))
1360 kvm_enable_efer_bits(EFER_NX);
1361
1362 if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
1363 kvm_enable_efer_bits(EFER_FFXSR);
1364
1365 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
1366 kvm_has_tsc_control = true;
1367 kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
1368 kvm_tsc_scaling_ratio_frac_bits = 32;
1369 }
1370
1371
1372 if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
1373 pause_filter_count = 0;
1374 pause_filter_thresh = 0;
1375 } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
1376 pause_filter_thresh = 0;
1377 }
1378
1379 if (nested) {
1380 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
1381 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
1382 }
1383
1384 if (sev) {
1385 if (boot_cpu_has(X86_FEATURE_SEV) &&
1386 IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
1387 r = sev_hardware_setup();
1388 if (r)
1389 sev = false;
1390 } else {
1391 sev = false;
1392 }
1393 }
1394
1395 svm_adjust_mmio_mask();
1396
1397 for_each_possible_cpu(cpu) {
1398 r = svm_cpu_init(cpu);
1399 if (r)
1400 goto err;
1401 }
1402
1403 if (!boot_cpu_has(X86_FEATURE_NPT))
1404 npt_enabled = false;
1405
1406 if (npt_enabled && !npt) {
1407 printk(KERN_INFO "kvm: Nested Paging disabled\n");
1408 npt_enabled = false;
1409 }
1410
1411 if (npt_enabled) {
1412 printk(KERN_INFO "kvm: Nested Paging enabled\n");
1413 kvm_enable_tdp();
1414 } else
1415 kvm_disable_tdp();
1416
1417 if (nrips) {
1418 if (!boot_cpu_has(X86_FEATURE_NRIPS))
1419 nrips = false;
1420 }
1421
1422 if (avic) {
1423 if (!npt_enabled ||
1424 !boot_cpu_has(X86_FEATURE_AVIC) ||
1425 !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
1426 avic = false;
1427 } else {
1428 pr_info("AVIC enabled\n");
1429
1430 amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
1431 }
1432 }
1433
1434 if (vls) {
1435 if (!npt_enabled ||
1436 !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
1437 !IS_ENABLED(CONFIG_X86_64)) {
1438 vls = false;
1439 } else {
1440 pr_info("Virtual VMLOAD VMSAVE supported\n");
1441 }
1442 }
1443
1444 if (vgif) {
1445 if (!boot_cpu_has(X86_FEATURE_VGIF))
1446 vgif = false;
1447 else
1448 pr_info("Virtual GIF supported\n");
1449 }
1450
1451 return 0;
1452
1453 err:
1454 __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
1455 iopm_base = 0;
1456 return r;
1457 }
1458
1459 static __exit void svm_hardware_unsetup(void)
1460 {
1461 int cpu;
1462
1463 if (svm_sev_enabled())
1464 bitmap_free(sev_asid_bitmap);
1465
1466 for_each_possible_cpu(cpu)
1467 svm_cpu_uninit(cpu);
1468
1469 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
1470 iopm_base = 0;
1471 }
1472
1473 static void init_seg(struct vmcb_seg *seg)
1474 {
1475 seg->selector = 0;
1476 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
1477 SVM_SELECTOR_WRITE_MASK;
1478 seg->limit = 0xffff;
1479 seg->base = 0;
1480 }
1481
1482 static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
1483 {
1484 seg->selector = 0;
1485 seg->attrib = SVM_SELECTOR_P_MASK | type;
1486 seg->limit = 0xffff;
1487 seg->base = 0;
1488 }
1489
1490 static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
1491 {
1492 struct vcpu_svm *svm = to_svm(vcpu);
1493
1494 if (is_guest_mode(vcpu))
1495 return svm->nested.hsave->control.tsc_offset;
1496
1497 return vcpu->arch.tsc_offset;
1498 }
1499
1500 static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
1501 {
1502 struct vcpu_svm *svm = to_svm(vcpu);
1503 u64 g_tsc_offset = 0;
1504
1505 if (is_guest_mode(vcpu)) {
1506
1507 g_tsc_offset = svm->vmcb->control.tsc_offset -
1508 svm->nested.hsave->control.tsc_offset;
1509 svm->nested.hsave->control.tsc_offset = offset;
1510 }
1511
1512 trace_kvm_write_tsc_offset(vcpu->vcpu_id,
1513 svm->vmcb->control.tsc_offset - g_tsc_offset,
1514 offset);
1515
1516 svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
1517
1518 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1519 return svm->vmcb->control.tsc_offset;
1520 }
1521
1522 static void avic_init_vmcb(struct vcpu_svm *svm)
1523 {
1524 struct vmcb *vmcb = svm->vmcb;
1525 struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
1526 phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
1527 phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
1528 phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
1529
1530 vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
1531 vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
1532 vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
1533 vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
1534 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
1535 }
1536
1537 static void init_vmcb(struct vcpu_svm *svm)
1538 {
1539 struct vmcb_control_area *control = &svm->vmcb->control;
1540 struct vmcb_save_area *save = &svm->vmcb->save;
1541
1542 svm->vcpu.arch.hflags = 0;
1543
1544 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1545 set_cr_intercept(svm, INTERCEPT_CR3_READ);
1546 set_cr_intercept(svm, INTERCEPT_CR4_READ);
1547 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1548 set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1549 set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
1550 if (!kvm_vcpu_apicv_active(&svm->vcpu))
1551 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
1552
1553 set_dr_intercepts(svm);
1554
1555 set_exception_intercept(svm, PF_VECTOR);
1556 set_exception_intercept(svm, UD_VECTOR);
1557 set_exception_intercept(svm, MC_VECTOR);
1558 set_exception_intercept(svm, AC_VECTOR);
1559 set_exception_intercept(svm, DB_VECTOR);
1560
1561
1562
1563
1564
1565
1566 if (enable_vmware_backdoor)
1567 set_exception_intercept(svm, GP_VECTOR);
1568
1569 set_intercept(svm, INTERCEPT_INTR);
1570 set_intercept(svm, INTERCEPT_NMI);
1571 set_intercept(svm, INTERCEPT_SMI);
1572 set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1573 set_intercept(svm, INTERCEPT_RDPMC);
1574 set_intercept(svm, INTERCEPT_CPUID);
1575 set_intercept(svm, INTERCEPT_INVD);
1576 set_intercept(svm, INTERCEPT_INVLPG);
1577 set_intercept(svm, INTERCEPT_INVLPGA);
1578 set_intercept(svm, INTERCEPT_IOIO_PROT);
1579 set_intercept(svm, INTERCEPT_MSR_PROT);
1580 set_intercept(svm, INTERCEPT_TASK_SWITCH);
1581 set_intercept(svm, INTERCEPT_SHUTDOWN);
1582 set_intercept(svm, INTERCEPT_VMRUN);
1583 set_intercept(svm, INTERCEPT_VMMCALL);
1584 set_intercept(svm, INTERCEPT_VMLOAD);
1585 set_intercept(svm, INTERCEPT_VMSAVE);
1586 set_intercept(svm, INTERCEPT_STGI);
1587 set_intercept(svm, INTERCEPT_CLGI);
1588 set_intercept(svm, INTERCEPT_SKINIT);
1589 set_intercept(svm, INTERCEPT_WBINVD);
1590 set_intercept(svm, INTERCEPT_XSETBV);
1591 set_intercept(svm, INTERCEPT_RDPRU);
1592 set_intercept(svm, INTERCEPT_RSM);
1593
1594 if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
1595 set_intercept(svm, INTERCEPT_MONITOR);
1596 set_intercept(svm, INTERCEPT_MWAIT);
1597 }
1598
1599 if (!kvm_hlt_in_guest(svm->vcpu.kvm))
1600 set_intercept(svm, INTERCEPT_HLT);
1601
1602 control->iopm_base_pa = __sme_set(iopm_base);
1603 control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
1604 control->int_ctl = V_INTR_MASKING_MASK;
1605
1606 init_seg(&save->es);
1607 init_seg(&save->ss);
1608 init_seg(&save->ds);
1609 init_seg(&save->fs);
1610 init_seg(&save->gs);
1611
1612 save->cs.selector = 0xf000;
1613 save->cs.base = 0xffff0000;
1614
1615 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1616 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1617 save->cs.limit = 0xffff;
1618
1619 save->gdtr.limit = 0xffff;
1620 save->idtr.limit = 0xffff;
1621
1622 init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1623 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1624
1625 svm_set_efer(&svm->vcpu, 0);
1626 save->dr6 = 0xffff0ff0;
1627 kvm_set_rflags(&svm->vcpu, 2);
1628 save->rip = 0x0000fff0;
1629 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
1630
1631
1632
1633
1634
1635 svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
1636 kvm_mmu_reset_context(&svm->vcpu);
1637
1638 save->cr4 = X86_CR4_PAE;
1639
1640
1641 if (npt_enabled) {
1642
1643 control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
1644 clr_intercept(svm, INTERCEPT_INVLPG);
1645 clr_exception_intercept(svm, PF_VECTOR);
1646 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1647 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1648 save->g_pat = svm->vcpu.arch.pat;
1649 save->cr3 = 0;
1650 save->cr4 = 0;
1651 }
1652 svm->asid_generation = 0;
1653
1654 svm->nested.vmcb = 0;
1655 svm->vcpu.arch.hflags = 0;
1656
1657 if (pause_filter_count) {
1658 control->pause_filter_count = pause_filter_count;
1659 if (pause_filter_thresh)
1660 control->pause_filter_thresh = pause_filter_thresh;
1661 set_intercept(svm, INTERCEPT_PAUSE);
1662 } else {
1663 clr_intercept(svm, INTERCEPT_PAUSE);
1664 }
1665
1666 if (kvm_vcpu_apicv_active(&svm->vcpu))
1667 avic_init_vmcb(svm);
1668
1669
1670
1671
1672
1673 if (vls) {
1674 clr_intercept(svm, INTERCEPT_VMLOAD);
1675 clr_intercept(svm, INTERCEPT_VMSAVE);
1676 svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
1677 }
1678
1679 if (vgif) {
1680 clr_intercept(svm, INTERCEPT_STGI);
1681 clr_intercept(svm, INTERCEPT_CLGI);
1682 svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
1683 }
1684
1685 if (sev_guest(svm->vcpu.kvm)) {
1686 svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
1687 clr_exception_intercept(svm, UD_VECTOR);
1688 }
1689
1690 mark_all_dirty(svm->vmcb);
1691
1692 enable_gif(svm);
1693
1694 }
1695
1696 static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
1697 unsigned int index)
1698 {
1699 u64 *avic_physical_id_table;
1700 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
1701
1702 if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
1703 return NULL;
1704
1705 avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
1706
1707 return &avic_physical_id_table[index];
1708 }
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718 static int avic_init_access_page(struct kvm_vcpu *vcpu)
1719 {
1720 struct kvm *kvm = vcpu->kvm;
1721 int ret = 0;
1722
1723 mutex_lock(&kvm->slots_lock);
1724 if (kvm->arch.apic_access_page_done)
1725 goto out;
1726
1727 ret = __x86_set_memory_region(kvm,
1728 APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
1729 APIC_DEFAULT_PHYS_BASE,
1730 PAGE_SIZE);
1731 if (ret)
1732 goto out;
1733
1734 kvm->arch.apic_access_page_done = true;
1735 out:
1736 mutex_unlock(&kvm->slots_lock);
1737 return ret;
1738 }
1739
1740 static int avic_init_backing_page(struct kvm_vcpu *vcpu)
1741 {
1742 int ret;
1743 u64 *entry, new_entry;
1744 int id = vcpu->vcpu_id;
1745 struct vcpu_svm *svm = to_svm(vcpu);
1746
1747 ret = avic_init_access_page(vcpu);
1748 if (ret)
1749 return ret;
1750
1751 if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
1752 return -EINVAL;
1753
1754 if (!svm->vcpu.arch.apic->regs)
1755 return -EINVAL;
1756
1757 svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
1758
1759
1760 entry = avic_get_physical_id_entry(vcpu, id);
1761 if (!entry)
1762 return -EINVAL;
1763
1764 new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
1765 AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
1766 AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
1767 WRITE_ONCE(*entry, new_entry);
1768
1769 svm->avic_physical_id_cache = entry;
1770
1771 return 0;
1772 }
1773
1774 static void __sev_asid_free(int asid)
1775 {
1776 struct svm_cpu_data *sd;
1777 int cpu, pos;
1778
1779 pos = asid - 1;
1780 clear_bit(pos, sev_asid_bitmap);
1781
1782 for_each_possible_cpu(cpu) {
1783 sd = per_cpu(svm_data, cpu);
1784 sd->sev_vmcbs[pos] = NULL;
1785 }
1786 }
1787
1788 static void sev_asid_free(struct kvm *kvm)
1789 {
1790 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1791
1792 __sev_asid_free(sev->asid);
1793 }
1794
1795 static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
1796 {
1797 struct sev_data_decommission *decommission;
1798 struct sev_data_deactivate *data;
1799
1800 if (!handle)
1801 return;
1802
1803 data = kzalloc(sizeof(*data), GFP_KERNEL);
1804 if (!data)
1805 return;
1806
1807
1808 data->handle = handle;
1809 sev_guest_deactivate(data, NULL);
1810
1811 wbinvd_on_all_cpus();
1812 sev_guest_df_flush(NULL);
1813 kfree(data);
1814
1815 decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
1816 if (!decommission)
1817 return;
1818
1819
1820 decommission->handle = handle;
1821 sev_guest_decommission(decommission, NULL);
1822
1823 kfree(decommission);
1824 }
1825
1826 static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
1827 unsigned long ulen, unsigned long *n,
1828 int write)
1829 {
1830 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1831 unsigned long npages, npinned, size;
1832 unsigned long locked, lock_limit;
1833 struct page **pages;
1834 unsigned long first, last;
1835
1836 if (ulen == 0 || uaddr + ulen < uaddr)
1837 return NULL;
1838
1839
1840 first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
1841 last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
1842 npages = (last - first + 1);
1843
1844 locked = sev->pages_locked + npages;
1845 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
1846 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
1847 pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
1848 return NULL;
1849 }
1850
1851
1852 size = npages * sizeof(struct page *);
1853 if (size > PAGE_SIZE)
1854 pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO,
1855 PAGE_KERNEL);
1856 else
1857 pages = kmalloc(size, GFP_KERNEL_ACCOUNT);
1858
1859 if (!pages)
1860 return NULL;
1861
1862
1863 npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
1864 if (npinned != npages) {
1865 pr_err("SEV: Failure locking %lu pages.\n", npages);
1866 goto err;
1867 }
1868
1869 *n = npages;
1870 sev->pages_locked = locked;
1871
1872 return pages;
1873
1874 err:
1875 if (npinned > 0)
1876 release_pages(pages, npinned);
1877
1878 kvfree(pages);
1879 return NULL;
1880 }
1881
1882 static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
1883 unsigned long npages)
1884 {
1885 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1886
1887 release_pages(pages, npages);
1888 kvfree(pages);
1889 sev->pages_locked -= npages;
1890 }
1891
1892 static void sev_clflush_pages(struct page *pages[], unsigned long npages)
1893 {
1894 uint8_t *page_virtual;
1895 unsigned long i;
1896
1897 if (npages == 0 || pages == NULL)
1898 return;
1899
1900 for (i = 0; i < npages; i++) {
1901 page_virtual = kmap_atomic(pages[i]);
1902 clflush_cache_range(page_virtual, PAGE_SIZE);
1903 kunmap_atomic(page_virtual);
1904 }
1905 }
1906
1907 static void __unregister_enc_region_locked(struct kvm *kvm,
1908 struct enc_region *region)
1909 {
1910
1911
1912
1913
1914
1915
1916 sev_clflush_pages(region->pages, region->npages);
1917
1918 sev_unpin_memory(kvm, region->pages, region->npages);
1919 list_del(®ion->list);
1920 kfree(region);
1921 }
1922
1923 static struct kvm *svm_vm_alloc(void)
1924 {
1925 struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm),
1926 GFP_KERNEL_ACCOUNT | __GFP_ZERO,
1927 PAGE_KERNEL);
1928
1929 if (!kvm_svm)
1930 return NULL;
1931
1932 return &kvm_svm->kvm;
1933 }
1934
1935 static void svm_vm_free(struct kvm *kvm)
1936 {
1937 vfree(to_kvm_svm(kvm));
1938 }
1939
1940 static void sev_vm_destroy(struct kvm *kvm)
1941 {
1942 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1943 struct list_head *head = &sev->regions_list;
1944 struct list_head *pos, *q;
1945
1946 if (!sev_guest(kvm))
1947 return;
1948
1949 mutex_lock(&kvm->lock);
1950
1951
1952
1953
1954
1955 if (!list_empty(head)) {
1956 list_for_each_safe(pos, q, head) {
1957 __unregister_enc_region_locked(kvm,
1958 list_entry(pos, struct enc_region, list));
1959 }
1960 }
1961
1962 mutex_unlock(&kvm->lock);
1963
1964 sev_unbind_asid(kvm, sev->handle);
1965 sev_asid_free(kvm);
1966 }
1967
1968 static void avic_vm_destroy(struct kvm *kvm)
1969 {
1970 unsigned long flags;
1971 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
1972
1973 if (!avic)
1974 return;
1975
1976 if (kvm_svm->avic_logical_id_table_page)
1977 __free_page(kvm_svm->avic_logical_id_table_page);
1978 if (kvm_svm->avic_physical_id_table_page)
1979 __free_page(kvm_svm->avic_physical_id_table_page);
1980
1981 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
1982 hash_del(&kvm_svm->hnode);
1983 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
1984 }
1985
1986 static void svm_vm_destroy(struct kvm *kvm)
1987 {
1988 avic_vm_destroy(kvm);
1989 sev_vm_destroy(kvm);
1990 }
1991
1992 static int avic_vm_init(struct kvm *kvm)
1993 {
1994 unsigned long flags;
1995 int err = -ENOMEM;
1996 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
1997 struct kvm_svm *k2;
1998 struct page *p_page;
1999 struct page *l_page;
2000 u32 vm_id;
2001
2002 if (!avic)
2003 return 0;
2004
2005
2006 p_page = alloc_page(GFP_KERNEL_ACCOUNT);
2007 if (!p_page)
2008 goto free_avic;
2009
2010 kvm_svm->avic_physical_id_table_page = p_page;
2011 clear_page(page_address(p_page));
2012
2013
2014 l_page = alloc_page(GFP_KERNEL_ACCOUNT);
2015 if (!l_page)
2016 goto free_avic;
2017
2018 kvm_svm->avic_logical_id_table_page = l_page;
2019 clear_page(page_address(l_page));
2020
2021 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
2022 again:
2023 vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
2024 if (vm_id == 0) {
2025 next_vm_id_wrapped = 1;
2026 goto again;
2027 }
2028
2029 if (next_vm_id_wrapped) {
2030 hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
2031 if (k2->avic_vm_id == vm_id)
2032 goto again;
2033 }
2034 }
2035 kvm_svm->avic_vm_id = vm_id;
2036 hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
2037 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
2038
2039 return 0;
2040
2041 free_avic:
2042 avic_vm_destroy(kvm);
2043 return err;
2044 }
2045
2046 static inline int
2047 avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
2048 {
2049 int ret = 0;
2050 unsigned long flags;
2051 struct amd_svm_iommu_ir *ir;
2052 struct vcpu_svm *svm = to_svm(vcpu);
2053
2054 if (!kvm_arch_has_assigned_device(vcpu->kvm))
2055 return 0;
2056
2057
2058
2059
2060
2061 spin_lock_irqsave(&svm->ir_list_lock, flags);
2062
2063 if (list_empty(&svm->ir_list))
2064 goto out;
2065
2066 list_for_each_entry(ir, &svm->ir_list, node) {
2067 ret = amd_iommu_update_ga(cpu, r, ir->data);
2068 if (ret)
2069 break;
2070 }
2071 out:
2072 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
2073 return ret;
2074 }
2075
2076 static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2077 {
2078 u64 entry;
2079
2080 int h_physical_id = kvm_cpu_get_apicid(cpu);
2081 struct vcpu_svm *svm = to_svm(vcpu);
2082
2083 if (!kvm_vcpu_apicv_active(vcpu))
2084 return;
2085
2086
2087
2088
2089
2090 if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
2091 return;
2092
2093 entry = READ_ONCE(*(svm->avic_physical_id_cache));
2094 WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
2095
2096 entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
2097 entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
2098
2099 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2100 if (svm->avic_is_running)
2101 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2102
2103 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
2104 avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
2105 svm->avic_is_running);
2106 }
2107
2108 static void avic_vcpu_put(struct kvm_vcpu *vcpu)
2109 {
2110 u64 entry;
2111 struct vcpu_svm *svm = to_svm(vcpu);
2112
2113 if (!kvm_vcpu_apicv_active(vcpu))
2114 return;
2115
2116 entry = READ_ONCE(*(svm->avic_physical_id_cache));
2117 if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
2118 avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
2119
2120 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
2121 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
2122 }
2123
2124
2125
2126
2127 static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
2128 {
2129 struct vcpu_svm *svm = to_svm(vcpu);
2130
2131 svm->avic_is_running = is_run;
2132 if (is_run)
2133 avic_vcpu_load(vcpu, vcpu->cpu);
2134 else
2135 avic_vcpu_put(vcpu);
2136 }
2137
2138 static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
2139 {
2140 struct vcpu_svm *svm = to_svm(vcpu);
2141 u32 dummy;
2142 u32 eax = 1;
2143
2144 vcpu->arch.microcode_version = 0x01000065;
2145 svm->spec_ctrl = 0;
2146 svm->virt_spec_ctrl = 0;
2147
2148 if (!init_event) {
2149 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
2150 MSR_IA32_APICBASE_ENABLE;
2151 if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
2152 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
2153 }
2154 init_vmcb(svm);
2155
2156 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
2157 kvm_rdx_write(vcpu, eax);
2158
2159 if (kvm_vcpu_apicv_active(vcpu) && !init_event)
2160 avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
2161 }
2162
2163 static int avic_init_vcpu(struct vcpu_svm *svm)
2164 {
2165 int ret;
2166
2167 if (!kvm_vcpu_apicv_active(&svm->vcpu))
2168 return 0;
2169
2170 ret = avic_init_backing_page(&svm->vcpu);
2171 if (ret)
2172 return ret;
2173
2174 INIT_LIST_HEAD(&svm->ir_list);
2175 spin_lock_init(&svm->ir_list_lock);
2176 svm->dfr_reg = APIC_DFR_FLAT;
2177
2178 return ret;
2179 }
2180
2181 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
2182 {
2183 struct vcpu_svm *svm;
2184 struct page *page;
2185 struct page *msrpm_pages;
2186 struct page *hsave_page;
2187 struct page *nested_msrpm_pages;
2188 int err;
2189
2190 BUILD_BUG_ON_MSG(offsetof(struct vcpu_svm, vcpu) != 0,
2191 "struct kvm_vcpu must be at offset 0 for arch usercopy region");
2192
2193 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
2194 if (!svm) {
2195 err = -ENOMEM;
2196 goto out;
2197 }
2198
2199 svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
2200 GFP_KERNEL_ACCOUNT);
2201 if (!svm->vcpu.arch.user_fpu) {
2202 printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
2203 err = -ENOMEM;
2204 goto free_partial_svm;
2205 }
2206
2207 svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
2208 GFP_KERNEL_ACCOUNT);
2209 if (!svm->vcpu.arch.guest_fpu) {
2210 printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
2211 err = -ENOMEM;
2212 goto free_user_fpu;
2213 }
2214
2215 err = kvm_vcpu_init(&svm->vcpu, kvm, id);
2216 if (err)
2217 goto free_svm;
2218
2219 err = -ENOMEM;
2220 page = alloc_page(GFP_KERNEL_ACCOUNT);
2221 if (!page)
2222 goto uninit;
2223
2224 msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
2225 if (!msrpm_pages)
2226 goto free_page1;
2227
2228 nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER);
2229 if (!nested_msrpm_pages)
2230 goto free_page2;
2231
2232 hsave_page = alloc_page(GFP_KERNEL_ACCOUNT);
2233 if (!hsave_page)
2234 goto free_page3;
2235
2236 err = avic_init_vcpu(svm);
2237 if (err)
2238 goto free_page4;
2239
2240
2241
2242
2243 svm->avic_is_running = true;
2244
2245 svm->nested.hsave = page_address(hsave_page);
2246
2247 svm->msrpm = page_address(msrpm_pages);
2248 svm_vcpu_init_msrpm(svm->msrpm);
2249
2250 svm->nested.msrpm = page_address(nested_msrpm_pages);
2251 svm_vcpu_init_msrpm(svm->nested.msrpm);
2252
2253 svm->vmcb = page_address(page);
2254 clear_page(svm->vmcb);
2255 svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT);
2256 svm->asid_generation = 0;
2257 init_vmcb(svm);
2258
2259 svm_init_osvw(&svm->vcpu);
2260
2261 return &svm->vcpu;
2262
2263 free_page4:
2264 __free_page(hsave_page);
2265 free_page3:
2266 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
2267 free_page2:
2268 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
2269 free_page1:
2270 __free_page(page);
2271 uninit:
2272 kvm_vcpu_uninit(&svm->vcpu);
2273 free_svm:
2274 kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
2275 free_user_fpu:
2276 kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
2277 free_partial_svm:
2278 kmem_cache_free(kvm_vcpu_cache, svm);
2279 out:
2280 return ERR_PTR(err);
2281 }
2282
2283 static void svm_clear_current_vmcb(struct vmcb *vmcb)
2284 {
2285 int i;
2286
2287 for_each_online_cpu(i)
2288 cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
2289 }
2290
2291 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
2292 {
2293 struct vcpu_svm *svm = to_svm(vcpu);
2294
2295
2296
2297
2298
2299
2300 svm_clear_current_vmcb(svm->vmcb);
2301
2302 __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
2303 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
2304 __free_page(virt_to_page(svm->nested.hsave));
2305 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
2306 kvm_vcpu_uninit(vcpu);
2307 kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
2308 kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
2309 kmem_cache_free(kvm_vcpu_cache, svm);
2310 }
2311
2312 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2313 {
2314 struct vcpu_svm *svm = to_svm(vcpu);
2315 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
2316 int i;
2317
2318 if (unlikely(cpu != vcpu->cpu)) {
2319 svm->asid_generation = 0;
2320 mark_all_dirty(svm->vmcb);
2321 }
2322
2323 #ifdef CONFIG_X86_64
2324 rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
2325 #endif
2326 savesegment(fs, svm->host.fs);
2327 savesegment(gs, svm->host.gs);
2328 svm->host.ldt = kvm_read_ldt();
2329
2330 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
2331 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
2332
2333 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
2334 u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
2335 if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
2336 __this_cpu_write(current_tsc_ratio, tsc_ratio);
2337 wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
2338 }
2339 }
2340
2341 if (static_cpu_has(X86_FEATURE_RDTSCP))
2342 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
2343
2344 if (sd->current_vmcb != svm->vmcb) {
2345 sd->current_vmcb = svm->vmcb;
2346 indirect_branch_prediction_barrier();
2347 }
2348 avic_vcpu_load(vcpu, cpu);
2349 }
2350
2351 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
2352 {
2353 struct vcpu_svm *svm = to_svm(vcpu);
2354 int i;
2355
2356 avic_vcpu_put(vcpu);
2357
2358 ++vcpu->stat.host_state_reload;
2359 kvm_load_ldt(svm->host.ldt);
2360 #ifdef CONFIG_X86_64
2361 loadsegment(fs, svm->host.fs);
2362 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gsbase);
2363 load_gs_index(svm->host.gs);
2364 #else
2365 #ifdef CONFIG_X86_32_LAZY_GS
2366 loadsegment(gs, svm->host.gs);
2367 #endif
2368 #endif
2369 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
2370 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
2371 }
2372
2373 static void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
2374 {
2375 avic_set_running(vcpu, false);
2376 }
2377
2378 static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
2379 {
2380 avic_set_running(vcpu, true);
2381 }
2382
2383 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
2384 {
2385 struct vcpu_svm *svm = to_svm(vcpu);
2386 unsigned long rflags = svm->vmcb->save.rflags;
2387
2388 if (svm->nmi_singlestep) {
2389
2390 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF))
2391 rflags &= ~X86_EFLAGS_TF;
2392 if (!(svm->nmi_singlestep_guest_rflags & X86_EFLAGS_RF))
2393 rflags &= ~X86_EFLAGS_RF;
2394 }
2395 return rflags;
2396 }
2397
2398 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
2399 {
2400 if (to_svm(vcpu)->nmi_singlestep)
2401 rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
2402
2403
2404
2405
2406
2407
2408 to_svm(vcpu)->vmcb->save.rflags = rflags;
2409 }
2410
2411 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
2412 {
2413 switch (reg) {
2414 case VCPU_EXREG_PDPTR:
2415 BUG_ON(!npt_enabled);
2416 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
2417 break;
2418 default:
2419 BUG();
2420 }
2421 }
2422
2423 static void svm_set_vintr(struct vcpu_svm *svm)
2424 {
2425 set_intercept(svm, INTERCEPT_VINTR);
2426 }
2427
2428 static void svm_clear_vintr(struct vcpu_svm *svm)
2429 {
2430 clr_intercept(svm, INTERCEPT_VINTR);
2431 }
2432
2433 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
2434 {
2435 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
2436
2437 switch (seg) {
2438 case VCPU_SREG_CS: return &save->cs;
2439 case VCPU_SREG_DS: return &save->ds;
2440 case VCPU_SREG_ES: return &save->es;
2441 case VCPU_SREG_FS: return &save->fs;
2442 case VCPU_SREG_GS: return &save->gs;
2443 case VCPU_SREG_SS: return &save->ss;
2444 case VCPU_SREG_TR: return &save->tr;
2445 case VCPU_SREG_LDTR: return &save->ldtr;
2446 }
2447 BUG();
2448 return NULL;
2449 }
2450
2451 static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
2452 {
2453 struct vmcb_seg *s = svm_seg(vcpu, seg);
2454
2455 return s->base;
2456 }
2457
2458 static void svm_get_segment(struct kvm_vcpu *vcpu,
2459 struct kvm_segment *var, int seg)
2460 {
2461 struct vmcb_seg *s = svm_seg(vcpu, seg);
2462
2463 var->base = s->base;
2464 var->limit = s->limit;
2465 var->selector = s->selector;
2466 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
2467 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
2468 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
2469 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
2470 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
2471 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
2472 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482 var->g = s->limit > 0xfffff;
2483
2484
2485
2486
2487
2488 var->unusable = !var->present;
2489
2490 switch (seg) {
2491 case VCPU_SREG_TR:
2492
2493
2494
2495
2496 var->type |= 0x2;
2497 break;
2498 case VCPU_SREG_DS:
2499 case VCPU_SREG_ES:
2500 case VCPU_SREG_FS:
2501 case VCPU_SREG_GS:
2502
2503
2504
2505
2506
2507
2508
2509 if (!var->unusable)
2510 var->type |= 0x1;
2511 break;
2512 case VCPU_SREG_SS:
2513
2514
2515
2516
2517
2518
2519 if (var->unusable)
2520 var->db = 0;
2521
2522 var->dpl = to_svm(vcpu)->vmcb->save.cpl;
2523 break;
2524 }
2525 }
2526
2527 static int svm_get_cpl(struct kvm_vcpu *vcpu)
2528 {
2529 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
2530
2531 return save->cpl;
2532 }
2533
2534 static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2535 {
2536 struct vcpu_svm *svm = to_svm(vcpu);
2537
2538 dt->size = svm->vmcb->save.idtr.limit;
2539 dt->address = svm->vmcb->save.idtr.base;
2540 }
2541
2542 static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2543 {
2544 struct vcpu_svm *svm = to_svm(vcpu);
2545
2546 svm->vmcb->save.idtr.limit = dt->size;
2547 svm->vmcb->save.idtr.base = dt->address ;
2548 mark_dirty(svm->vmcb, VMCB_DT);
2549 }
2550
2551 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2552 {
2553 struct vcpu_svm *svm = to_svm(vcpu);
2554
2555 dt->size = svm->vmcb->save.gdtr.limit;
2556 dt->address = svm->vmcb->save.gdtr.base;
2557 }
2558
2559 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
2560 {
2561 struct vcpu_svm *svm = to_svm(vcpu);
2562
2563 svm->vmcb->save.gdtr.limit = dt->size;
2564 svm->vmcb->save.gdtr.base = dt->address ;
2565 mark_dirty(svm->vmcb, VMCB_DT);
2566 }
2567
2568 static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
2569 {
2570 }
2571
2572 static void svm_decache_cr3(struct kvm_vcpu *vcpu)
2573 {
2574 }
2575
2576 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
2577 {
2578 }
2579
2580 static void update_cr0_intercept(struct vcpu_svm *svm)
2581 {
2582 ulong gcr0 = svm->vcpu.arch.cr0;
2583 u64 *hcr0 = &svm->vmcb->save.cr0;
2584
2585 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
2586 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
2587
2588 mark_dirty(svm->vmcb, VMCB_CR);
2589
2590 if (gcr0 == *hcr0) {
2591 clr_cr_intercept(svm, INTERCEPT_CR0_READ);
2592 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
2593 } else {
2594 set_cr_intercept(svm, INTERCEPT_CR0_READ);
2595 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
2596 }
2597 }
2598
2599 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
2600 {
2601 struct vcpu_svm *svm = to_svm(vcpu);
2602
2603 #ifdef CONFIG_X86_64
2604 if (vcpu->arch.efer & EFER_LME) {
2605 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
2606 vcpu->arch.efer |= EFER_LMA;
2607 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
2608 }
2609
2610 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
2611 vcpu->arch.efer &= ~EFER_LMA;
2612 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
2613 }
2614 }
2615 #endif
2616 vcpu->arch.cr0 = cr0;
2617
2618 if (!npt_enabled)
2619 cr0 |= X86_CR0_PG | X86_CR0_WP;
2620
2621
2622
2623
2624
2625
2626 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
2627 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
2628 svm->vmcb->save.cr0 = cr0;
2629 mark_dirty(svm->vmcb, VMCB_CR);
2630 update_cr0_intercept(svm);
2631 }
2632
2633 static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
2634 {
2635 unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
2636 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
2637
2638 if (cr4 & X86_CR4_VMXE)
2639 return 1;
2640
2641 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
2642 svm_flush_tlb(vcpu, true);
2643
2644 vcpu->arch.cr4 = cr4;
2645 if (!npt_enabled)
2646 cr4 |= X86_CR4_PAE;
2647 cr4 |= host_cr4_mce;
2648 to_svm(vcpu)->vmcb->save.cr4 = cr4;
2649 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
2650 return 0;
2651 }
2652
2653 static void svm_set_segment(struct kvm_vcpu *vcpu,
2654 struct kvm_segment *var, int seg)
2655 {
2656 struct vcpu_svm *svm = to_svm(vcpu);
2657 struct vmcb_seg *s = svm_seg(vcpu, seg);
2658
2659 s->base = var->base;
2660 s->limit = var->limit;
2661 s->selector = var->selector;
2662 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
2663 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
2664 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
2665 s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT;
2666 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
2667 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
2668 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
2669 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
2670
2671
2672
2673
2674
2675
2676
2677 if (seg == VCPU_SREG_SS)
2678
2679 svm->vmcb->save.cpl = (var->dpl & 3);
2680
2681 mark_dirty(svm->vmcb, VMCB_SEG);
2682 }
2683
2684 static void update_bp_intercept(struct kvm_vcpu *vcpu)
2685 {
2686 struct vcpu_svm *svm = to_svm(vcpu);
2687
2688 clr_exception_intercept(svm, BP_VECTOR);
2689
2690 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
2691 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
2692 set_exception_intercept(svm, BP_VECTOR);
2693 } else
2694 vcpu->guest_debug = 0;
2695 }
2696
2697 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
2698 {
2699 if (sd->next_asid > sd->max_asid) {
2700 ++sd->asid_generation;
2701 sd->next_asid = sd->min_asid;
2702 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
2703 }
2704
2705 svm->asid_generation = sd->asid_generation;
2706 svm->vmcb->control.asid = sd->next_asid++;
2707
2708 mark_dirty(svm->vmcb, VMCB_ASID);
2709 }
2710
2711 static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
2712 {
2713 return to_svm(vcpu)->vmcb->save.dr6;
2714 }
2715
2716 static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
2717 {
2718 struct vcpu_svm *svm = to_svm(vcpu);
2719
2720 svm->vmcb->save.dr6 = value;
2721 mark_dirty(svm->vmcb, VMCB_DR);
2722 }
2723
2724 static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
2725 {
2726 struct vcpu_svm *svm = to_svm(vcpu);
2727
2728 get_debugreg(vcpu->arch.db[0], 0);
2729 get_debugreg(vcpu->arch.db[1], 1);
2730 get_debugreg(vcpu->arch.db[2], 2);
2731 get_debugreg(vcpu->arch.db[3], 3);
2732 vcpu->arch.dr6 = svm_get_dr6(vcpu);
2733 vcpu->arch.dr7 = svm->vmcb->save.dr7;
2734
2735 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
2736 set_dr_intercepts(svm);
2737 }
2738
2739 static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
2740 {
2741 struct vcpu_svm *svm = to_svm(vcpu);
2742
2743 svm->vmcb->save.dr7 = value;
2744 mark_dirty(svm->vmcb, VMCB_DR);
2745 }
2746
2747 static int pf_interception(struct vcpu_svm *svm)
2748 {
2749 u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
2750 u64 error_code = svm->vmcb->control.exit_info_1;
2751
2752 return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
2753 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2754 svm->vmcb->control.insn_bytes : NULL,
2755 svm->vmcb->control.insn_len);
2756 }
2757
2758 static int npf_interception(struct vcpu_svm *svm)
2759 {
2760 u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
2761 u64 error_code = svm->vmcb->control.exit_info_1;
2762
2763 trace_kvm_page_fault(fault_address, error_code);
2764 return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
2765 static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
2766 svm->vmcb->control.insn_bytes : NULL,
2767 svm->vmcb->control.insn_len);
2768 }
2769
2770 static int db_interception(struct vcpu_svm *svm)
2771 {
2772 struct kvm_run *kvm_run = svm->vcpu.run;
2773 struct kvm_vcpu *vcpu = &svm->vcpu;
2774
2775 if (!(svm->vcpu.guest_debug &
2776 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
2777 !svm->nmi_singlestep) {
2778 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
2779 return 1;
2780 }
2781
2782 if (svm->nmi_singlestep) {
2783 disable_nmi_singlestep(svm);
2784
2785 kvm_make_request(KVM_REQ_EVENT, vcpu);
2786 }
2787
2788 if (svm->vcpu.guest_debug &
2789 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
2790 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2791 kvm_run->debug.arch.pc =
2792 svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2793 kvm_run->debug.arch.exception = DB_VECTOR;
2794 return 0;
2795 }
2796
2797 return 1;
2798 }
2799
2800 static int bp_interception(struct vcpu_svm *svm)
2801 {
2802 struct kvm_run *kvm_run = svm->vcpu.run;
2803
2804 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2805 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
2806 kvm_run->debug.arch.exception = BP_VECTOR;
2807 return 0;
2808 }
2809
2810 static int ud_interception(struct vcpu_svm *svm)
2811 {
2812 return handle_ud(&svm->vcpu);
2813 }
2814
2815 static int ac_interception(struct vcpu_svm *svm)
2816 {
2817 kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
2818 return 1;
2819 }
2820
2821 static int gp_interception(struct vcpu_svm *svm)
2822 {
2823 struct kvm_vcpu *vcpu = &svm->vcpu;
2824 u32 error_code = svm->vmcb->control.exit_info_1;
2825
2826 WARN_ON_ONCE(!enable_vmware_backdoor);
2827
2828
2829
2830
2831
2832 if (error_code) {
2833 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
2834 return 1;
2835 }
2836 return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
2837 }
2838
2839 static bool is_erratum_383(void)
2840 {
2841 int err, i;
2842 u64 value;
2843
2844 if (!erratum_383_found)
2845 return false;
2846
2847 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
2848 if (err)
2849 return false;
2850
2851
2852 value &= ~(1ULL << 62);
2853
2854 if (value != 0xb600000000010015ULL)
2855 return false;
2856
2857
2858 for (i = 0; i < 6; ++i)
2859 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
2860
2861 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
2862 if (!err) {
2863 u32 low, high;
2864
2865 value &= ~(1ULL << 2);
2866 low = lower_32_bits(value);
2867 high = upper_32_bits(value);
2868
2869 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
2870 }
2871
2872
2873 __flush_tlb_all();
2874
2875 return true;
2876 }
2877
2878 static void svm_handle_mce(struct vcpu_svm *svm)
2879 {
2880 if (is_erratum_383()) {
2881
2882
2883
2884
2885 pr_err("KVM: Guest triggered AMD Erratum 383\n");
2886
2887 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
2888
2889 return;
2890 }
2891
2892
2893
2894
2895
2896 asm volatile (
2897 "int $0x12\n");
2898
2899
2900 return;
2901 }
2902
2903 static int mc_interception(struct vcpu_svm *svm)
2904 {
2905 return 1;
2906 }
2907
2908 static int shutdown_interception(struct vcpu_svm *svm)
2909 {
2910 struct kvm_run *kvm_run = svm->vcpu.run;
2911
2912
2913
2914
2915
2916 clear_page(svm->vmcb);
2917 init_vmcb(svm);
2918
2919 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2920 return 0;
2921 }
2922
2923 static int io_interception(struct vcpu_svm *svm)
2924 {
2925 struct kvm_vcpu *vcpu = &svm->vcpu;
2926 u32 io_info = svm->vmcb->control.exit_info_1;
2927 int size, in, string;
2928 unsigned port;
2929
2930 ++svm->vcpu.stat.io_exits;
2931 string = (io_info & SVM_IOIO_STR_MASK) != 0;
2932 in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
2933 if (string)
2934 return kvm_emulate_instruction(vcpu, 0);
2935
2936 port = io_info >> 16;
2937 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
2938 svm->next_rip = svm->vmcb->control.exit_info_2;
2939
2940 return kvm_fast_pio(&svm->vcpu, size, port, in);
2941 }
2942
2943 static int nmi_interception(struct vcpu_svm *svm)
2944 {
2945 return 1;
2946 }
2947
2948 static int intr_interception(struct vcpu_svm *svm)
2949 {
2950 ++svm->vcpu.stat.irq_exits;
2951 return 1;
2952 }
2953
2954 static int nop_on_interception(struct vcpu_svm *svm)
2955 {
2956 return 1;
2957 }
2958
2959 static int halt_interception(struct vcpu_svm *svm)
2960 {
2961 return kvm_emulate_halt(&svm->vcpu);
2962 }
2963
2964 static int vmmcall_interception(struct vcpu_svm *svm)
2965 {
2966 return kvm_emulate_hypercall(&svm->vcpu);
2967 }
2968
2969 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
2970 {
2971 struct vcpu_svm *svm = to_svm(vcpu);
2972
2973 return svm->nested.nested_cr3;
2974 }
2975
2976 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
2977 {
2978 struct vcpu_svm *svm = to_svm(vcpu);
2979 u64 cr3 = svm->nested.nested_cr3;
2980 u64 pdpte;
2981 int ret;
2982
2983 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
2984 offset_in_page(cr3) + index * 8, 8);
2985 if (ret)
2986 return 0;
2987 return pdpte;
2988 }
2989
2990 static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
2991 unsigned long root)
2992 {
2993 struct vcpu_svm *svm = to_svm(vcpu);
2994
2995 svm->vmcb->control.nested_cr3 = __sme_set(root);
2996 mark_dirty(svm->vmcb, VMCB_NPT);
2997 }
2998
2999 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
3000 struct x86_exception *fault)
3001 {
3002 struct vcpu_svm *svm = to_svm(vcpu);
3003
3004 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
3005
3006
3007
3008
3009 svm->vmcb->control.exit_code = SVM_EXIT_NPF;
3010 svm->vmcb->control.exit_code_hi = 0;
3011 svm->vmcb->control.exit_info_1 = (1ULL << 32);
3012 svm->vmcb->control.exit_info_2 = fault->address;
3013 }
3014
3015 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
3016 svm->vmcb->control.exit_info_1 |= fault->error_code;
3017
3018
3019
3020
3021
3022 if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
3023 svm->vmcb->control.exit_info_1 &= ~1;
3024
3025 nested_svm_vmexit(svm);
3026 }
3027
3028 static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
3029 {
3030 WARN_ON(mmu_is_nested(vcpu));
3031
3032 vcpu->arch.mmu = &vcpu->arch.guest_mmu;
3033 kvm_init_shadow_mmu(vcpu);
3034 vcpu->arch.mmu->set_cr3 = nested_svm_set_tdp_cr3;
3035 vcpu->arch.mmu->get_cr3 = nested_svm_get_tdp_cr3;
3036 vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
3037 vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
3038 vcpu->arch.mmu->shadow_root_level = get_npt_level(vcpu);
3039 reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
3040 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
3041 }
3042
3043 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
3044 {
3045 vcpu->arch.mmu = &vcpu->arch.root_mmu;
3046 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
3047 }
3048
3049 static int nested_svm_check_permissions(struct vcpu_svm *svm)
3050 {
3051 if (!(svm->vcpu.arch.efer & EFER_SVME) ||
3052 !is_paging(&svm->vcpu)) {
3053 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3054 return 1;
3055 }
3056
3057 if (svm->vmcb->save.cpl) {
3058 kvm_inject_gp(&svm->vcpu, 0);
3059 return 1;
3060 }
3061
3062 return 0;
3063 }
3064
3065 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
3066 bool has_error_code, u32 error_code)
3067 {
3068 int vmexit;
3069
3070 if (!is_guest_mode(&svm->vcpu))
3071 return 0;
3072
3073 vmexit = nested_svm_intercept(svm);
3074 if (vmexit != NESTED_EXIT_DONE)
3075 return 0;
3076
3077 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
3078 svm->vmcb->control.exit_code_hi = 0;
3079 svm->vmcb->control.exit_info_1 = error_code;
3080
3081
3082
3083
3084
3085 if (svm->vcpu.arch.exception.nested_apf)
3086 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
3087 else if (svm->vcpu.arch.exception.has_payload)
3088 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
3089 else
3090 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
3091
3092 svm->nested.exit_required = true;
3093 return vmexit;
3094 }
3095
3096
3097 static inline bool nested_svm_intr(struct vcpu_svm *svm)
3098 {
3099 if (!is_guest_mode(&svm->vcpu))
3100 return true;
3101
3102 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
3103 return true;
3104
3105 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
3106 return false;
3107
3108
3109
3110
3111
3112
3113 if (svm->nested.exit_required)
3114 return false;
3115
3116 svm->vmcb->control.exit_code = SVM_EXIT_INTR;
3117 svm->vmcb->control.exit_info_1 = 0;
3118 svm->vmcb->control.exit_info_2 = 0;
3119
3120 if (svm->nested.intercept & 1ULL) {
3121
3122
3123
3124
3125
3126
3127 svm->nested.exit_required = true;
3128 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
3129 return false;
3130 }
3131
3132 return true;
3133 }
3134
3135
3136 static inline bool nested_svm_nmi(struct vcpu_svm *svm)
3137 {
3138 if (!is_guest_mode(&svm->vcpu))
3139 return true;
3140
3141 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
3142 return true;
3143
3144 svm->vmcb->control.exit_code = SVM_EXIT_NMI;
3145 svm->nested.exit_required = true;
3146
3147 return false;
3148 }
3149
3150 static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
3151 {
3152 unsigned port, size, iopm_len;
3153 u16 val, mask;
3154 u8 start_bit;
3155 u64 gpa;
3156
3157 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
3158 return NESTED_EXIT_HOST;
3159
3160 port = svm->vmcb->control.exit_info_1 >> 16;
3161 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
3162 SVM_IOIO_SIZE_SHIFT;
3163 gpa = svm->nested.vmcb_iopm + (port / 8);
3164 start_bit = port % 8;
3165 iopm_len = (start_bit + size > 8) ? 2 : 1;
3166 mask = (0xf >> (4 - size)) << start_bit;
3167 val = 0;
3168
3169 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
3170 return NESTED_EXIT_DONE;
3171
3172 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
3173 }
3174
3175 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
3176 {
3177 u32 offset, msr, value;
3178 int write, mask;
3179
3180 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
3181 return NESTED_EXIT_HOST;
3182
3183 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3184 offset = svm_msrpm_offset(msr);
3185 write = svm->vmcb->control.exit_info_1 & 1;
3186 mask = 1 << ((2 * (msr & 0xf)) + write);
3187
3188 if (offset == MSR_INVALID)
3189 return NESTED_EXIT_DONE;
3190
3191
3192 offset *= 4;
3193
3194 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
3195 return NESTED_EXIT_DONE;
3196
3197 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
3198 }
3199
3200
3201 static int nested_svm_intercept_db(struct vcpu_svm *svm)
3202 {
3203 unsigned long dr6;
3204
3205
3206 if (!svm->nmi_singlestep)
3207 return NESTED_EXIT_DONE;
3208
3209
3210 if (kvm_get_dr(&svm->vcpu, 6, &dr6))
3211 return NESTED_EXIT_DONE;
3212 if (!(dr6 & DR6_BS))
3213 return NESTED_EXIT_DONE;
3214
3215
3216 if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
3217 disable_nmi_singlestep(svm);
3218 return NESTED_EXIT_DONE;
3219 }
3220
3221
3222 return NESTED_EXIT_HOST;
3223 }
3224
3225 static int nested_svm_exit_special(struct vcpu_svm *svm)
3226 {
3227 u32 exit_code = svm->vmcb->control.exit_code;
3228
3229 switch (exit_code) {
3230 case SVM_EXIT_INTR:
3231 case SVM_EXIT_NMI:
3232 case SVM_EXIT_EXCP_BASE + MC_VECTOR:
3233 return NESTED_EXIT_HOST;
3234 case SVM_EXIT_NPF:
3235
3236 if (npt_enabled)
3237 return NESTED_EXIT_HOST;
3238 break;
3239 case SVM_EXIT_EXCP_BASE + PF_VECTOR:
3240
3241 if (!npt_enabled || svm->vcpu.arch.apf.host_apf_reason)
3242 return NESTED_EXIT_HOST;
3243 break;
3244 default:
3245 break;
3246 }
3247
3248 return NESTED_EXIT_CONTINUE;
3249 }
3250
3251
3252
3253
3254 static int nested_svm_intercept(struct vcpu_svm *svm)
3255 {
3256 u32 exit_code = svm->vmcb->control.exit_code;
3257 int vmexit = NESTED_EXIT_HOST;
3258
3259 switch (exit_code) {
3260 case SVM_EXIT_MSR:
3261 vmexit = nested_svm_exit_handled_msr(svm);
3262 break;
3263 case SVM_EXIT_IOIO:
3264 vmexit = nested_svm_intercept_ioio(svm);
3265 break;
3266 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
3267 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
3268 if (svm->nested.intercept_cr & bit)
3269 vmexit = NESTED_EXIT_DONE;
3270 break;
3271 }
3272 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
3273 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
3274 if (svm->nested.intercept_dr & bit)
3275 vmexit = NESTED_EXIT_DONE;
3276 break;
3277 }
3278 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
3279 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
3280 if (svm->nested.intercept_exceptions & excp_bits) {
3281 if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
3282 vmexit = nested_svm_intercept_db(svm);
3283 else
3284 vmexit = NESTED_EXIT_DONE;
3285 }
3286
3287 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
3288 svm->vcpu.arch.exception.nested_apf != 0)
3289 vmexit = NESTED_EXIT_DONE;
3290 break;
3291 }
3292 case SVM_EXIT_ERR: {
3293 vmexit = NESTED_EXIT_DONE;
3294 break;
3295 }
3296 default: {
3297 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
3298 if (svm->nested.intercept & exit_bits)
3299 vmexit = NESTED_EXIT_DONE;
3300 }
3301 }
3302
3303 return vmexit;
3304 }
3305
3306 static int nested_svm_exit_handled(struct vcpu_svm *svm)
3307 {
3308 int vmexit;
3309
3310 vmexit = nested_svm_intercept(svm);
3311
3312 if (vmexit == NESTED_EXIT_DONE)
3313 nested_svm_vmexit(svm);
3314
3315 return vmexit;
3316 }
3317
3318 static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
3319 {
3320 struct vmcb_control_area *dst = &dst_vmcb->control;
3321 struct vmcb_control_area *from = &from_vmcb->control;
3322
3323 dst->intercept_cr = from->intercept_cr;
3324 dst->intercept_dr = from->intercept_dr;
3325 dst->intercept_exceptions = from->intercept_exceptions;
3326 dst->intercept = from->intercept;
3327 dst->iopm_base_pa = from->iopm_base_pa;
3328 dst->msrpm_base_pa = from->msrpm_base_pa;
3329 dst->tsc_offset = from->tsc_offset;
3330
3331 dst->tlb_ctl = from->tlb_ctl;
3332 dst->int_ctl = from->int_ctl;
3333 dst->int_vector = from->int_vector;
3334 dst->int_state = from->int_state;
3335 dst->exit_code = from->exit_code;
3336 dst->exit_code_hi = from->exit_code_hi;
3337 dst->exit_info_1 = from->exit_info_1;
3338 dst->exit_info_2 = from->exit_info_2;
3339 dst->exit_int_info = from->exit_int_info;
3340 dst->exit_int_info_err = from->exit_int_info_err;
3341 dst->nested_ctl = from->nested_ctl;
3342 dst->event_inj = from->event_inj;
3343 dst->event_inj_err = from->event_inj_err;
3344 dst->nested_cr3 = from->nested_cr3;
3345 dst->virt_ext = from->virt_ext;
3346 dst->pause_filter_count = from->pause_filter_count;
3347 dst->pause_filter_thresh = from->pause_filter_thresh;
3348 }
3349
3350 static int nested_svm_vmexit(struct vcpu_svm *svm)
3351 {
3352 int rc;
3353 struct vmcb *nested_vmcb;
3354 struct vmcb *hsave = svm->nested.hsave;
3355 struct vmcb *vmcb = svm->vmcb;
3356 struct kvm_host_map map;
3357
3358 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
3359 vmcb->control.exit_info_1,
3360 vmcb->control.exit_info_2,
3361 vmcb->control.exit_int_info,
3362 vmcb->control.exit_int_info_err,
3363 KVM_ISA_SVM);
3364
3365 rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
3366 if (rc) {
3367 if (rc == -EINVAL)
3368 kvm_inject_gp(&svm->vcpu, 0);
3369 return 1;
3370 }
3371
3372 nested_vmcb = map.hva;
3373
3374
3375 leave_guest_mode(&svm->vcpu);
3376 svm->nested.vmcb = 0;
3377
3378
3379 disable_gif(svm);
3380
3381 nested_vmcb->save.es = vmcb->save.es;
3382 nested_vmcb->save.cs = vmcb->save.cs;
3383 nested_vmcb->save.ss = vmcb->save.ss;
3384 nested_vmcb->save.ds = vmcb->save.ds;
3385 nested_vmcb->save.gdtr = vmcb->save.gdtr;
3386 nested_vmcb->save.idtr = vmcb->save.idtr;
3387 nested_vmcb->save.efer = svm->vcpu.arch.efer;
3388 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
3389 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
3390 nested_vmcb->save.cr2 = vmcb->save.cr2;
3391 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
3392 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
3393 nested_vmcb->save.rip = vmcb->save.rip;
3394 nested_vmcb->save.rsp = vmcb->save.rsp;
3395 nested_vmcb->save.rax = vmcb->save.rax;
3396 nested_vmcb->save.dr7 = vmcb->save.dr7;
3397 nested_vmcb->save.dr6 = vmcb->save.dr6;
3398 nested_vmcb->save.cpl = vmcb->save.cpl;
3399
3400 nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
3401 nested_vmcb->control.int_vector = vmcb->control.int_vector;
3402 nested_vmcb->control.int_state = vmcb->control.int_state;
3403 nested_vmcb->control.exit_code = vmcb->control.exit_code;
3404 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
3405 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
3406 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
3407 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
3408 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
3409
3410 if (svm->nrips_enabled)
3411 nested_vmcb->control.next_rip = vmcb->control.next_rip;
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
3422 struct vmcb_control_area *nc = &nested_vmcb->control;
3423
3424 nc->exit_int_info = vmcb->control.event_inj;
3425 nc->exit_int_info_err = vmcb->control.event_inj_err;
3426 }
3427
3428 nested_vmcb->control.tlb_ctl = 0;
3429 nested_vmcb->control.event_inj = 0;
3430 nested_vmcb->control.event_inj_err = 0;
3431
3432 nested_vmcb->control.pause_filter_count =
3433 svm->vmcb->control.pause_filter_count;
3434 nested_vmcb->control.pause_filter_thresh =
3435 svm->vmcb->control.pause_filter_thresh;
3436
3437
3438 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
3439 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
3440
3441
3442 copy_vmcb_control_area(vmcb, hsave);
3443
3444 svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
3445 kvm_clear_exception_queue(&svm->vcpu);
3446 kvm_clear_interrupt_queue(&svm->vcpu);
3447
3448 svm->nested.nested_cr3 = 0;
3449
3450
3451 svm->vmcb->save.es = hsave->save.es;
3452 svm->vmcb->save.cs = hsave->save.cs;
3453 svm->vmcb->save.ss = hsave->save.ss;
3454 svm->vmcb->save.ds = hsave->save.ds;
3455 svm->vmcb->save.gdtr = hsave->save.gdtr;
3456 svm->vmcb->save.idtr = hsave->save.idtr;
3457 kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
3458 svm_set_efer(&svm->vcpu, hsave->save.efer);
3459 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
3460 svm_set_cr4(&svm->vcpu, hsave->save.cr4);
3461 if (npt_enabled) {
3462 svm->vmcb->save.cr3 = hsave->save.cr3;
3463 svm->vcpu.arch.cr3 = hsave->save.cr3;
3464 } else {
3465 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
3466 }
3467 kvm_rax_write(&svm->vcpu, hsave->save.rax);
3468 kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
3469 kvm_rip_write(&svm->vcpu, hsave->save.rip);
3470 svm->vmcb->save.dr7 = 0;
3471 svm->vmcb->save.cpl = 0;
3472 svm->vmcb->control.exit_int_info = 0;
3473
3474 mark_all_dirty(svm->vmcb);
3475
3476 kvm_vcpu_unmap(&svm->vcpu, &map, true);
3477
3478 nested_svm_uninit_mmu_context(&svm->vcpu);
3479 kvm_mmu_reset_context(&svm->vcpu);
3480 kvm_mmu_load(&svm->vcpu);
3481
3482
3483
3484
3485
3486 svm->vcpu.arch.nmi_injected = false;
3487 kvm_clear_exception_queue(&svm->vcpu);
3488 kvm_clear_interrupt_queue(&svm->vcpu);
3489
3490 return 0;
3491 }
3492
3493 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
3494 {
3495
3496
3497
3498
3499
3500 int i;
3501
3502 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
3503 return true;
3504
3505 for (i = 0; i < MSRPM_OFFSETS; i++) {
3506 u32 value, p;
3507 u64 offset;
3508
3509 if (msrpm_offsets[i] == 0xffffffff)
3510 break;
3511
3512 p = msrpm_offsets[i];
3513 offset = svm->nested.vmcb_msrpm + (p * 4);
3514
3515 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
3516 return false;
3517
3518 svm->nested.msrpm[p] = svm->msrpm[p] | value;
3519 }
3520
3521 svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
3522
3523 return true;
3524 }
3525
3526 static bool nested_vmcb_checks(struct vmcb *vmcb)
3527 {
3528 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
3529 return false;
3530
3531 if (vmcb->control.asid == 0)
3532 return false;
3533
3534 if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
3535 !npt_enabled)
3536 return false;
3537
3538 return true;
3539 }
3540
3541 static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
3542 struct vmcb *nested_vmcb, struct kvm_host_map *map)
3543 {
3544 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
3545 svm->vcpu.arch.hflags |= HF_HIF_MASK;
3546 else
3547 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
3548
3549 if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
3550 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
3551 nested_svm_init_mmu_context(&svm->vcpu);
3552 }
3553
3554
3555 svm->vmcb->save.es = nested_vmcb->save.es;
3556 svm->vmcb->save.cs = nested_vmcb->save.cs;
3557 svm->vmcb->save.ss = nested_vmcb->save.ss;
3558 svm->vmcb->save.ds = nested_vmcb->save.ds;
3559 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
3560 svm->vmcb->save.idtr = nested_vmcb->save.idtr;
3561 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
3562 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
3563 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
3564 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
3565 if (npt_enabled) {
3566 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
3567 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
3568 } else
3569 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
3570
3571
3572 kvm_mmu_reset_context(&svm->vcpu);
3573
3574 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
3575 kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
3576 kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
3577 kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip);
3578
3579
3580 svm->vmcb->save.rax = nested_vmcb->save.rax;
3581 svm->vmcb->save.rsp = nested_vmcb->save.rsp;
3582 svm->vmcb->save.rip = nested_vmcb->save.rip;
3583 svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
3584 svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
3585 svm->vmcb->save.cpl = nested_vmcb->save.cpl;
3586
3587 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
3588 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
3589
3590
3591 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
3592 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
3593 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
3594 svm->nested.intercept = nested_vmcb->control.intercept;
3595
3596 svm_flush_tlb(&svm->vcpu, true);
3597 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
3598 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
3599 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
3600 else
3601 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
3602
3603 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
3604
3605 clr_cr_intercept(svm, INTERCEPT_CR8_READ);
3606 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3607 }
3608
3609
3610 clr_intercept(svm, INTERCEPT_VMMCALL);
3611
3612 svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
3613 svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
3614
3615 svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
3616 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
3617 svm->vmcb->control.int_state = nested_vmcb->control.int_state;
3618 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
3619 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
3620
3621 svm->vmcb->control.pause_filter_count =
3622 nested_vmcb->control.pause_filter_count;
3623 svm->vmcb->control.pause_filter_thresh =
3624 nested_vmcb->control.pause_filter_thresh;
3625
3626 kvm_vcpu_unmap(&svm->vcpu, map, true);
3627
3628
3629 enter_guest_mode(&svm->vcpu);
3630
3631
3632
3633
3634
3635 recalc_intercepts(svm);
3636
3637 svm->nested.vmcb = vmcb_gpa;
3638
3639 enable_gif(svm);
3640
3641 mark_all_dirty(svm->vmcb);
3642 }
3643
3644 static int nested_svm_vmrun(struct vcpu_svm *svm)
3645 {
3646 int ret;
3647 struct vmcb *nested_vmcb;
3648 struct vmcb *hsave = svm->nested.hsave;
3649 struct vmcb *vmcb = svm->vmcb;
3650 struct kvm_host_map map;
3651 u64 vmcb_gpa;
3652
3653 vmcb_gpa = svm->vmcb->save.rax;
3654
3655 ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
3656 if (ret == -EINVAL) {
3657 kvm_inject_gp(&svm->vcpu, 0);
3658 return 1;
3659 } else if (ret) {
3660 return kvm_skip_emulated_instruction(&svm->vcpu);
3661 }
3662
3663 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3664
3665 nested_vmcb = map.hva;
3666
3667 if (!nested_vmcb_checks(nested_vmcb)) {
3668 nested_vmcb->control.exit_code = SVM_EXIT_ERR;
3669 nested_vmcb->control.exit_code_hi = 0;
3670 nested_vmcb->control.exit_info_1 = 0;
3671 nested_vmcb->control.exit_info_2 = 0;
3672
3673 kvm_vcpu_unmap(&svm->vcpu, &map, true);
3674
3675 return ret;
3676 }
3677
3678 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
3679 nested_vmcb->save.rip,
3680 nested_vmcb->control.int_ctl,
3681 nested_vmcb->control.event_inj,
3682 nested_vmcb->control.nested_ctl);
3683
3684 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
3685 nested_vmcb->control.intercept_cr >> 16,
3686 nested_vmcb->control.intercept_exceptions,
3687 nested_vmcb->control.intercept);
3688
3689
3690 kvm_clear_exception_queue(&svm->vcpu);
3691 kvm_clear_interrupt_queue(&svm->vcpu);
3692
3693
3694
3695
3696
3697 hsave->save.es = vmcb->save.es;
3698 hsave->save.cs = vmcb->save.cs;
3699 hsave->save.ss = vmcb->save.ss;
3700 hsave->save.ds = vmcb->save.ds;
3701 hsave->save.gdtr = vmcb->save.gdtr;
3702 hsave->save.idtr = vmcb->save.idtr;
3703 hsave->save.efer = svm->vcpu.arch.efer;
3704 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
3705 hsave->save.cr4 = svm->vcpu.arch.cr4;
3706 hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
3707 hsave->save.rip = kvm_rip_read(&svm->vcpu);
3708 hsave->save.rsp = vmcb->save.rsp;
3709 hsave->save.rax = vmcb->save.rax;
3710 if (npt_enabled)
3711 hsave->save.cr3 = vmcb->save.cr3;
3712 else
3713 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
3714
3715 copy_vmcb_control_area(hsave, vmcb);
3716
3717 enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
3718
3719 if (!nested_svm_vmrun_msrpm(svm)) {
3720 svm->vmcb->control.exit_code = SVM_EXIT_ERR;
3721 svm->vmcb->control.exit_code_hi = 0;
3722 svm->vmcb->control.exit_info_1 = 0;
3723 svm->vmcb->control.exit_info_2 = 0;
3724
3725 nested_svm_vmexit(svm);
3726 }
3727
3728 return ret;
3729 }
3730
3731 static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
3732 {
3733 to_vmcb->save.fs = from_vmcb->save.fs;
3734 to_vmcb->save.gs = from_vmcb->save.gs;
3735 to_vmcb->save.tr = from_vmcb->save.tr;
3736 to_vmcb->save.ldtr = from_vmcb->save.ldtr;
3737 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
3738 to_vmcb->save.star = from_vmcb->save.star;
3739 to_vmcb->save.lstar = from_vmcb->save.lstar;
3740 to_vmcb->save.cstar = from_vmcb->save.cstar;
3741 to_vmcb->save.sfmask = from_vmcb->save.sfmask;
3742 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
3743 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
3744 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
3745 }
3746
3747 static int vmload_interception(struct vcpu_svm *svm)
3748 {
3749 struct vmcb *nested_vmcb;
3750 struct kvm_host_map map;
3751 int ret;
3752
3753 if (nested_svm_check_permissions(svm))
3754 return 1;
3755
3756 ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
3757 if (ret) {
3758 if (ret == -EINVAL)
3759 kvm_inject_gp(&svm->vcpu, 0);
3760 return 1;
3761 }
3762
3763 nested_vmcb = map.hva;
3764
3765 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3766
3767 nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
3768 kvm_vcpu_unmap(&svm->vcpu, &map, true);
3769
3770 return ret;
3771 }
3772
3773 static int vmsave_interception(struct vcpu_svm *svm)
3774 {
3775 struct vmcb *nested_vmcb;
3776 struct kvm_host_map map;
3777 int ret;
3778
3779 if (nested_svm_check_permissions(svm))
3780 return 1;
3781
3782 ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
3783 if (ret) {
3784 if (ret == -EINVAL)
3785 kvm_inject_gp(&svm->vcpu, 0);
3786 return 1;
3787 }
3788
3789 nested_vmcb = map.hva;
3790
3791 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3792
3793 nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
3794 kvm_vcpu_unmap(&svm->vcpu, &map, true);
3795
3796 return ret;
3797 }
3798
3799 static int vmrun_interception(struct vcpu_svm *svm)
3800 {
3801 if (nested_svm_check_permissions(svm))
3802 return 1;
3803
3804 return nested_svm_vmrun(svm);
3805 }
3806
3807 static int stgi_interception(struct vcpu_svm *svm)
3808 {
3809 int ret;
3810
3811 if (nested_svm_check_permissions(svm))
3812 return 1;
3813
3814
3815
3816
3817
3818 if (vgif_enabled(svm))
3819 clr_intercept(svm, INTERCEPT_STGI);
3820
3821 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3822 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3823
3824 enable_gif(svm);
3825
3826 return ret;
3827 }
3828
3829 static int clgi_interception(struct vcpu_svm *svm)
3830 {
3831 int ret;
3832
3833 if (nested_svm_check_permissions(svm))
3834 return 1;
3835
3836 ret = kvm_skip_emulated_instruction(&svm->vcpu);
3837
3838 disable_gif(svm);
3839
3840
3841 if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
3842 svm_clear_vintr(svm);
3843 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3844 mark_dirty(svm->vmcb, VMCB_INTR);
3845 }
3846
3847 return ret;
3848 }
3849
3850 static int invlpga_interception(struct vcpu_svm *svm)
3851 {
3852 struct kvm_vcpu *vcpu = &svm->vcpu;
3853
3854 trace_kvm_invlpga(svm->vmcb->save.rip, kvm_rcx_read(&svm->vcpu),
3855 kvm_rax_read(&svm->vcpu));
3856
3857
3858 kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
3859
3860 return kvm_skip_emulated_instruction(&svm->vcpu);
3861 }
3862
3863 static int skinit_interception(struct vcpu_svm *svm)
3864 {
3865 trace_kvm_skinit(svm->vmcb->save.rip, kvm_rax_read(&svm->vcpu));
3866
3867 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3868 return 1;
3869 }
3870
3871 static int wbinvd_interception(struct vcpu_svm *svm)
3872 {
3873 return kvm_emulate_wbinvd(&svm->vcpu);
3874 }
3875
3876 static int xsetbv_interception(struct vcpu_svm *svm)
3877 {
3878 u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
3879 u32 index = kvm_rcx_read(&svm->vcpu);
3880
3881 if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
3882 return kvm_skip_emulated_instruction(&svm->vcpu);
3883 }
3884
3885 return 1;
3886 }
3887
3888 static int rdpru_interception(struct vcpu_svm *svm)
3889 {
3890 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
3891 return 1;
3892 }
3893
3894 static int task_switch_interception(struct vcpu_svm *svm)
3895 {
3896 u16 tss_selector;
3897 int reason;
3898 int int_type = svm->vmcb->control.exit_int_info &
3899 SVM_EXITINTINFO_TYPE_MASK;
3900 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
3901 uint32_t type =
3902 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
3903 uint32_t idt_v =
3904 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
3905 bool has_error_code = false;
3906 u32 error_code = 0;
3907
3908 tss_selector = (u16)svm->vmcb->control.exit_info_1;
3909
3910 if (svm->vmcb->control.exit_info_2 &
3911 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
3912 reason = TASK_SWITCH_IRET;
3913 else if (svm->vmcb->control.exit_info_2 &
3914 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
3915 reason = TASK_SWITCH_JMP;
3916 else if (idt_v)
3917 reason = TASK_SWITCH_GATE;
3918 else
3919 reason = TASK_SWITCH_CALL;
3920
3921 if (reason == TASK_SWITCH_GATE) {
3922 switch (type) {
3923 case SVM_EXITINTINFO_TYPE_NMI:
3924 svm->vcpu.arch.nmi_injected = false;
3925 break;
3926 case SVM_EXITINTINFO_TYPE_EXEPT:
3927 if (svm->vmcb->control.exit_info_2 &
3928 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
3929 has_error_code = true;
3930 error_code =
3931 (u32)svm->vmcb->control.exit_info_2;
3932 }
3933 kvm_clear_exception_queue(&svm->vcpu);
3934 break;
3935 case SVM_EXITINTINFO_TYPE_INTR:
3936 kvm_clear_interrupt_queue(&svm->vcpu);
3937 break;
3938 default:
3939 break;
3940 }
3941 }
3942
3943 if (reason != TASK_SWITCH_GATE ||
3944 int_type == SVM_EXITINTINFO_TYPE_SOFT ||
3945 (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
3946 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
3947 if (!skip_emulated_instruction(&svm->vcpu))
3948 return 0;
3949 }
3950
3951 if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
3952 int_vec = -1;
3953
3954 return kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
3955 has_error_code, error_code);
3956 }
3957
3958 static int cpuid_interception(struct vcpu_svm *svm)
3959 {
3960 return kvm_emulate_cpuid(&svm->vcpu);
3961 }
3962
3963 static int iret_interception(struct vcpu_svm *svm)
3964 {
3965 ++svm->vcpu.stat.nmi_window_exits;
3966 clr_intercept(svm, INTERCEPT_IRET);
3967 svm->vcpu.arch.hflags |= HF_IRET_MASK;
3968 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
3969 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3970 return 1;
3971 }
3972
3973 static int invlpg_interception(struct vcpu_svm *svm)
3974 {
3975 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
3976 return kvm_emulate_instruction(&svm->vcpu, 0);
3977
3978 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
3979 return kvm_skip_emulated_instruction(&svm->vcpu);
3980 }
3981
3982 static int emulate_on_interception(struct vcpu_svm *svm)
3983 {
3984 return kvm_emulate_instruction(&svm->vcpu, 0);
3985 }
3986
3987 static int rsm_interception(struct vcpu_svm *svm)
3988 {
3989 return kvm_emulate_instruction_from_buffer(&svm->vcpu, rsm_ins_bytes, 2);
3990 }
3991
3992 static int rdpmc_interception(struct vcpu_svm *svm)
3993 {
3994 int err;
3995
3996 if (!nrips)
3997 return emulate_on_interception(svm);
3998
3999 err = kvm_rdpmc(&svm->vcpu);
4000 return kvm_complete_insn_gp(&svm->vcpu, err);
4001 }
4002
4003 static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
4004 unsigned long val)
4005 {
4006 unsigned long cr0 = svm->vcpu.arch.cr0;
4007 bool ret = false;
4008 u64 intercept;
4009
4010 intercept = svm->nested.intercept;
4011
4012 if (!is_guest_mode(&svm->vcpu) ||
4013 (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
4014 return false;
4015
4016 cr0 &= ~SVM_CR0_SELECTIVE_MASK;
4017 val &= ~SVM_CR0_SELECTIVE_MASK;
4018
4019 if (cr0 ^ val) {
4020 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
4021 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
4022 }
4023
4024 return ret;
4025 }
4026
4027 #define CR_VALID (1ULL << 63)
4028
4029 static int cr_interception(struct vcpu_svm *svm)
4030 {
4031 int reg, cr;
4032 unsigned long val;
4033 int err;
4034
4035 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
4036 return emulate_on_interception(svm);
4037
4038 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
4039 return emulate_on_interception(svm);
4040
4041 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
4042 if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
4043 cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
4044 else
4045 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
4046
4047 err = 0;
4048 if (cr >= 16) {
4049 cr -= 16;
4050 val = kvm_register_read(&svm->vcpu, reg);
4051 switch (cr) {
4052 case 0:
4053 if (!check_selective_cr0_intercepted(svm, val))
4054 err = kvm_set_cr0(&svm->vcpu, val);
4055 else
4056 return 1;
4057
4058 break;
4059 case 3:
4060 err = kvm_set_cr3(&svm->vcpu, val);
4061 break;
4062 case 4:
4063 err = kvm_set_cr4(&svm->vcpu, val);
4064 break;
4065 case 8:
4066 err = kvm_set_cr8(&svm->vcpu, val);
4067 break;
4068 default:
4069 WARN(1, "unhandled write to CR%d", cr);
4070 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
4071 return 1;
4072 }
4073 } else {
4074 switch (cr) {
4075 case 0:
4076 val = kvm_read_cr0(&svm->vcpu);
4077 break;
4078 case 2:
4079 val = svm->vcpu.arch.cr2;
4080 break;
4081 case 3:
4082 val = kvm_read_cr3(&svm->vcpu);
4083 break;
4084 case 4:
4085 val = kvm_read_cr4(&svm->vcpu);
4086 break;
4087 case 8:
4088 val = kvm_get_cr8(&svm->vcpu);
4089 break;
4090 default:
4091 WARN(1, "unhandled read from CR%d", cr);
4092 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
4093 return 1;
4094 }
4095 kvm_register_write(&svm->vcpu, reg, val);
4096 }
4097 return kvm_complete_insn_gp(&svm->vcpu, err);
4098 }
4099
4100 static int dr_interception(struct vcpu_svm *svm)
4101 {
4102 int reg, dr;
4103 unsigned long val;
4104
4105 if (svm->vcpu.guest_debug == 0) {
4106
4107
4108
4109
4110
4111 clr_dr_intercepts(svm);
4112 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
4113 return 1;
4114 }
4115
4116 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
4117 return emulate_on_interception(svm);
4118
4119 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
4120 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
4121
4122 if (dr >= 16) {
4123 if (!kvm_require_dr(&svm->vcpu, dr - 16))
4124 return 1;
4125 val = kvm_register_read(&svm->vcpu, reg);
4126 kvm_set_dr(&svm->vcpu, dr - 16, val);
4127 } else {
4128 if (!kvm_require_dr(&svm->vcpu, dr))
4129 return 1;
4130 kvm_get_dr(&svm->vcpu, dr, &val);
4131 kvm_register_write(&svm->vcpu, reg, val);
4132 }
4133
4134 return kvm_skip_emulated_instruction(&svm->vcpu);
4135 }
4136
4137 static int cr8_write_interception(struct vcpu_svm *svm)
4138 {
4139 struct kvm_run *kvm_run = svm->vcpu.run;
4140 int r;
4141
4142 u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
4143
4144 r = cr_interception(svm);
4145 if (lapic_in_kernel(&svm->vcpu))
4146 return r;
4147 if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
4148 return r;
4149 kvm_run->exit_reason = KVM_EXIT_SET_TPR;
4150 return 0;
4151 }
4152
4153 static int svm_get_msr_feature(struct kvm_msr_entry *msr)
4154 {
4155 msr->data = 0;
4156
4157 switch (msr->index) {
4158 case MSR_F10H_DECFG:
4159 if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
4160 msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
4161 break;
4162 default:
4163 return 1;
4164 }
4165
4166 return 0;
4167 }
4168
4169 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
4170 {
4171 struct vcpu_svm *svm = to_svm(vcpu);
4172
4173 switch (msr_info->index) {
4174 case MSR_STAR:
4175 msr_info->data = svm->vmcb->save.star;
4176 break;
4177 #ifdef CONFIG_X86_64
4178 case MSR_LSTAR:
4179 msr_info->data = svm->vmcb->save.lstar;
4180 break;
4181 case MSR_CSTAR:
4182 msr_info->data = svm->vmcb->save.cstar;
4183 break;
4184 case MSR_KERNEL_GS_BASE:
4185 msr_info->data = svm->vmcb->save.kernel_gs_base;
4186 break;
4187 case MSR_SYSCALL_MASK:
4188 msr_info->data = svm->vmcb->save.sfmask;
4189 break;
4190 #endif
4191 case MSR_IA32_SYSENTER_CS:
4192 msr_info->data = svm->vmcb->save.sysenter_cs;
4193 break;
4194 case MSR_IA32_SYSENTER_EIP:
4195 msr_info->data = svm->sysenter_eip;
4196 break;
4197 case MSR_IA32_SYSENTER_ESP:
4198 msr_info->data = svm->sysenter_esp;
4199 break;
4200 case MSR_TSC_AUX:
4201 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
4202 return 1;
4203 msr_info->data = svm->tsc_aux;
4204 break;
4205
4206
4207
4208
4209
4210 case MSR_IA32_DEBUGCTLMSR:
4211 msr_info->data = svm->vmcb->save.dbgctl;
4212 break;
4213 case MSR_IA32_LASTBRANCHFROMIP:
4214 msr_info->data = svm->vmcb->save.br_from;
4215 break;
4216 case MSR_IA32_LASTBRANCHTOIP:
4217 msr_info->data = svm->vmcb->save.br_to;
4218 break;
4219 case MSR_IA32_LASTINTFROMIP:
4220 msr_info->data = svm->vmcb->save.last_excp_from;
4221 break;
4222 case MSR_IA32_LASTINTTOIP:
4223 msr_info->data = svm->vmcb->save.last_excp_to;
4224 break;
4225 case MSR_VM_HSAVE_PA:
4226 msr_info->data = svm->nested.hsave_msr;
4227 break;
4228 case MSR_VM_CR:
4229 msr_info->data = svm->nested.vm_cr_msr;
4230 break;
4231 case MSR_IA32_SPEC_CTRL:
4232 if (!msr_info->host_initiated &&
4233 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
4234 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
4235 return 1;
4236
4237 msr_info->data = svm->spec_ctrl;
4238 break;
4239 case MSR_AMD64_VIRT_SPEC_CTRL:
4240 if (!msr_info->host_initiated &&
4241 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
4242 return 1;
4243
4244 msr_info->data = svm->virt_spec_ctrl;
4245 break;
4246 case MSR_F15H_IC_CFG: {
4247
4248 int family, model;
4249
4250 family = guest_cpuid_family(vcpu);
4251 model = guest_cpuid_model(vcpu);
4252
4253 if (family < 0 || model < 0)
4254 return kvm_get_msr_common(vcpu, msr_info);
4255
4256 msr_info->data = 0;
4257
4258 if (family == 0x15 &&
4259 (model >= 0x2 && model < 0x20))
4260 msr_info->data = 0x1E;
4261 }
4262 break;
4263 case MSR_F10H_DECFG:
4264 msr_info->data = svm->msr_decfg;
4265 break;
4266 default:
4267 return kvm_get_msr_common(vcpu, msr_info);
4268 }
4269 return 0;
4270 }
4271
4272 static int rdmsr_interception(struct vcpu_svm *svm)
4273 {
4274 return kvm_emulate_rdmsr(&svm->vcpu);
4275 }
4276
4277 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
4278 {
4279 struct vcpu_svm *svm = to_svm(vcpu);
4280 int svm_dis, chg_mask;
4281
4282 if (data & ~SVM_VM_CR_VALID_MASK)
4283 return 1;
4284
4285 chg_mask = SVM_VM_CR_VALID_MASK;
4286
4287 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
4288 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
4289
4290 svm->nested.vm_cr_msr &= ~chg_mask;
4291 svm->nested.vm_cr_msr |= (data & chg_mask);
4292
4293 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
4294
4295
4296 if (svm_dis && (vcpu->arch.efer & EFER_SVME))
4297 return 1;
4298
4299 return 0;
4300 }
4301
4302 static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
4303 {
4304 struct vcpu_svm *svm = to_svm(vcpu);
4305
4306 u32 ecx = msr->index;
4307 u64 data = msr->data;
4308 switch (ecx) {
4309 case MSR_IA32_CR_PAT:
4310 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
4311 return 1;
4312 vcpu->arch.pat = data;
4313 svm->vmcb->save.g_pat = data;
4314 mark_dirty(svm->vmcb, VMCB_NPT);
4315 break;
4316 case MSR_IA32_SPEC_CTRL:
4317 if (!msr->host_initiated &&
4318 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
4319 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
4320 return 1;
4321
4322
4323 if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
4324 return 1;
4325
4326 svm->spec_ctrl = data;
4327
4328 if (!data)
4329 break;
4330
4331
4332
4333
4334
4335
4336
4337
4338
4339
4340
4341
4342 set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL, 1, 1);
4343 break;
4344 case MSR_IA32_PRED_CMD:
4345 if (!msr->host_initiated &&
4346 !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
4347 return 1;
4348
4349 if (data & ~PRED_CMD_IBPB)
4350 return 1;
4351
4352 if (!data)
4353 break;
4354
4355 wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
4356 if (is_guest_mode(vcpu))
4357 break;
4358 set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
4359 break;
4360 case MSR_AMD64_VIRT_SPEC_CTRL:
4361 if (!msr->host_initiated &&
4362 !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
4363 return 1;
4364
4365 if (data & ~SPEC_CTRL_SSBD)
4366 return 1;
4367
4368 svm->virt_spec_ctrl = data;
4369 break;
4370 case MSR_STAR:
4371 svm->vmcb->save.star = data;
4372 break;
4373 #ifdef CONFIG_X86_64
4374 case MSR_LSTAR:
4375 svm->vmcb->save.lstar = data;
4376 break;
4377 case MSR_CSTAR:
4378 svm->vmcb->save.cstar = data;
4379 break;
4380 case MSR_KERNEL_GS_BASE:
4381 svm->vmcb->save.kernel_gs_base = data;
4382 break;
4383 case MSR_SYSCALL_MASK:
4384 svm->vmcb->save.sfmask = data;
4385 break;
4386 #endif
4387 case MSR_IA32_SYSENTER_CS:
4388 svm->vmcb->save.sysenter_cs = data;
4389 break;
4390 case MSR_IA32_SYSENTER_EIP:
4391 svm->sysenter_eip = data;
4392 svm->vmcb->save.sysenter_eip = data;
4393 break;
4394 case MSR_IA32_SYSENTER_ESP:
4395 svm->sysenter_esp = data;
4396 svm->vmcb->save.sysenter_esp = data;
4397 break;
4398 case MSR_TSC_AUX:
4399 if (!boot_cpu_has(X86_FEATURE_RDTSCP))
4400 return 1;
4401
4402
4403
4404
4405
4406
4407 svm->tsc_aux = data;
4408 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
4409 break;
4410 case MSR_IA32_DEBUGCTLMSR:
4411 if (!boot_cpu_has(X86_FEATURE_LBRV)) {
4412 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
4413 __func__, data);
4414 break;
4415 }
4416 if (data & DEBUGCTL_RESERVED_BITS)
4417 return 1;
4418
4419 svm->vmcb->save.dbgctl = data;
4420 mark_dirty(svm->vmcb, VMCB_LBR);
4421 if (data & (1ULL<<0))
4422 svm_enable_lbrv(svm);
4423 else
4424 svm_disable_lbrv(svm);
4425 break;
4426 case MSR_VM_HSAVE_PA:
4427 svm->nested.hsave_msr = data;
4428 break;
4429 case MSR_VM_CR:
4430 return svm_set_vm_cr(vcpu, data);
4431 case MSR_VM_IGNNE:
4432 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
4433 break;
4434 case MSR_F10H_DECFG: {
4435 struct kvm_msr_entry msr_entry;
4436
4437 msr_entry.index = msr->index;
4438 if (svm_get_msr_feature(&msr_entry))
4439 return 1;
4440
4441
4442 if (data & ~msr_entry.data)
4443 return 1;
4444
4445
4446 if (!msr->host_initiated && (data ^ msr_entry.data))
4447 return 1;
4448
4449 svm->msr_decfg = data;
4450 break;
4451 }
4452 case MSR_IA32_APICBASE:
4453 if (kvm_vcpu_apicv_active(vcpu))
4454 avic_update_vapic_bar(to_svm(vcpu), data);
4455
4456 default:
4457 return kvm_set_msr_common(vcpu, msr);
4458 }
4459 return 0;
4460 }
4461
4462 static int wrmsr_interception(struct vcpu_svm *svm)
4463 {
4464 return kvm_emulate_wrmsr(&svm->vcpu);
4465 }
4466
4467 static int msr_interception(struct vcpu_svm *svm)
4468 {
4469 if (svm->vmcb->control.exit_info_1)
4470 return wrmsr_interception(svm);
4471 else
4472 return rdmsr_interception(svm);
4473 }
4474
4475 static int interrupt_window_interception(struct vcpu_svm *svm)
4476 {
4477 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
4478 svm_clear_vintr(svm);
4479 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
4480 mark_dirty(svm->vmcb, VMCB_INTR);
4481 ++svm->vcpu.stat.irq_window_exits;
4482 return 1;
4483 }
4484
4485 static int pause_interception(struct vcpu_svm *svm)
4486 {
4487 struct kvm_vcpu *vcpu = &svm->vcpu;
4488 bool in_kernel = (svm_get_cpl(vcpu) == 0);
4489
4490 if (pause_filter_thresh)
4491 grow_ple_window(vcpu);
4492
4493 kvm_vcpu_on_spin(vcpu, in_kernel);
4494 return 1;
4495 }
4496
4497 static int nop_interception(struct vcpu_svm *svm)
4498 {
4499 return kvm_skip_emulated_instruction(&(svm->vcpu));
4500 }
4501
4502 static int monitor_interception(struct vcpu_svm *svm)
4503 {
4504 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n");
4505 return nop_interception(svm);
4506 }
4507
4508 static int mwait_interception(struct vcpu_svm *svm)
4509 {
4510 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n");
4511 return nop_interception(svm);
4512 }
4513
4514 enum avic_ipi_failure_cause {
4515 AVIC_IPI_FAILURE_INVALID_INT_TYPE,
4516 AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
4517 AVIC_IPI_FAILURE_INVALID_TARGET,
4518 AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
4519 };
4520
4521 static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
4522 {
4523 u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
4524 u32 icrl = svm->vmcb->control.exit_info_1;
4525 u32 id = svm->vmcb->control.exit_info_2 >> 32;
4526 u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
4527 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4528
4529 trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
4530
4531 switch (id) {
4532 case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
4543
4544 kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
4545 kvm_lapic_reg_write(apic, APIC_ICR, icrl);
4546 break;
4547 case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
4548 int i;
4549 struct kvm_vcpu *vcpu;
4550 struct kvm *kvm = svm->vcpu.kvm;
4551 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4552
4553
4554
4555
4556
4557
4558 kvm_for_each_vcpu(i, vcpu, kvm) {
4559 bool m = kvm_apic_match_dest(vcpu, apic,
4560 icrl & KVM_APIC_SHORT_MASK,
4561 GET_APIC_DEST_FIELD(icrh),
4562 icrl & KVM_APIC_DEST_MASK);
4563
4564 if (m && !avic_vcpu_is_running(vcpu))
4565 kvm_vcpu_wake_up(vcpu);
4566 }
4567 break;
4568 }
4569 case AVIC_IPI_FAILURE_INVALID_TARGET:
4570 WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
4571 index, svm->vcpu.vcpu_id, icrh, icrl);
4572 break;
4573 case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
4574 WARN_ONCE(1, "Invalid backing page\n");
4575 break;
4576 default:
4577 pr_err("Unknown IPI interception\n");
4578 }
4579
4580 return 1;
4581 }
4582
4583 static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
4584 {
4585 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
4586 int index;
4587 u32 *logical_apic_id_table;
4588 int dlid = GET_APIC_LOGICAL_ID(ldr);
4589
4590 if (!dlid)
4591 return NULL;
4592
4593 if (flat) {
4594 index = ffs(dlid) - 1;
4595 if (index > 7)
4596 return NULL;
4597 } else {
4598 int cluster = (dlid & 0xf0) >> 4;
4599 int apic = ffs(dlid & 0x0f) - 1;
4600
4601 if ((apic < 0) || (apic > 7) ||
4602 (cluster >= 0xf))
4603 return NULL;
4604 index = (cluster << 2) + apic;
4605 }
4606
4607 logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
4608
4609 return &logical_apic_id_table[index];
4610 }
4611
4612 static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
4613 {
4614 bool flat;
4615 u32 *entry, new_entry;
4616
4617 flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
4618 entry = avic_get_logical_id_entry(vcpu, ldr, flat);
4619 if (!entry)
4620 return -EINVAL;
4621
4622 new_entry = READ_ONCE(*entry);
4623 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
4624 new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
4625 new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
4626 WRITE_ONCE(*entry, new_entry);
4627
4628 return 0;
4629 }
4630
4631 static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
4632 {
4633 struct vcpu_svm *svm = to_svm(vcpu);
4634 bool flat = svm->dfr_reg == APIC_DFR_FLAT;
4635 u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
4636
4637 if (entry)
4638 clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
4639 }
4640
4641 static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
4642 {
4643 int ret = 0;
4644 struct vcpu_svm *svm = to_svm(vcpu);
4645 u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
4646 u32 id = kvm_xapic_id(vcpu->arch.apic);
4647
4648 if (ldr == svm->ldr_reg)
4649 return 0;
4650
4651 avic_invalidate_logical_id_entry(vcpu);
4652
4653 if (ldr)
4654 ret = avic_ldr_write(vcpu, id, ldr);
4655
4656 if (!ret)
4657 svm->ldr_reg = ldr;
4658
4659 return ret;
4660 }
4661
4662 static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
4663 {
4664 u64 *old, *new;
4665 struct vcpu_svm *svm = to_svm(vcpu);
4666 u32 id = kvm_xapic_id(vcpu->arch.apic);
4667
4668 if (vcpu->vcpu_id == id)
4669 return 0;
4670
4671 old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
4672 new = avic_get_physical_id_entry(vcpu, id);
4673 if (!new || !old)
4674 return 1;
4675
4676
4677 *new = *old;
4678 *old = 0ULL;
4679 to_svm(vcpu)->avic_physical_id_cache = new;
4680
4681
4682
4683
4684
4685 if (svm->ldr_reg)
4686 avic_handle_ldr_update(vcpu);
4687
4688 return 0;
4689 }
4690
4691 static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
4692 {
4693 struct vcpu_svm *svm = to_svm(vcpu);
4694 u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
4695
4696 if (svm->dfr_reg == dfr)
4697 return;
4698
4699 avic_invalidate_logical_id_entry(vcpu);
4700 svm->dfr_reg = dfr;
4701 }
4702
4703 static int avic_unaccel_trap_write(struct vcpu_svm *svm)
4704 {
4705 struct kvm_lapic *apic = svm->vcpu.arch.apic;
4706 u32 offset = svm->vmcb->control.exit_info_1 &
4707 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
4708
4709 switch (offset) {
4710 case APIC_ID:
4711 if (avic_handle_apic_id_update(&svm->vcpu))
4712 return 0;
4713 break;
4714 case APIC_LDR:
4715 if (avic_handle_ldr_update(&svm->vcpu))
4716 return 0;
4717 break;
4718 case APIC_DFR:
4719 avic_handle_dfr_update(&svm->vcpu);
4720 break;
4721 default:
4722 break;
4723 }
4724
4725 kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
4726
4727 return 1;
4728 }
4729
4730 static bool is_avic_unaccelerated_access_trap(u32 offset)
4731 {
4732 bool ret = false;
4733
4734 switch (offset) {
4735 case APIC_ID:
4736 case APIC_EOI:
4737 case APIC_RRR:
4738 case APIC_LDR:
4739 case APIC_DFR:
4740 case APIC_SPIV:
4741 case APIC_ESR:
4742 case APIC_ICR:
4743 case APIC_LVTT:
4744 case APIC_LVTTHMR:
4745 case APIC_LVTPC:
4746 case APIC_LVT0:
4747 case APIC_LVT1:
4748 case APIC_LVTERR:
4749 case APIC_TMICT:
4750 case APIC_TDCR:
4751 ret = true;
4752 break;
4753 default:
4754 break;
4755 }
4756 return ret;
4757 }
4758
4759 static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
4760 {
4761 int ret = 0;
4762 u32 offset = svm->vmcb->control.exit_info_1 &
4763 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
4764 u32 vector = svm->vmcb->control.exit_info_2 &
4765 AVIC_UNACCEL_ACCESS_VECTOR_MASK;
4766 bool write = (svm->vmcb->control.exit_info_1 >> 32) &
4767 AVIC_UNACCEL_ACCESS_WRITE_MASK;
4768 bool trap = is_avic_unaccelerated_access_trap(offset);
4769
4770 trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
4771 trap, write, vector);
4772 if (trap) {
4773
4774 WARN_ONCE(!write, "svm: Handling trap read.\n");
4775 ret = avic_unaccel_trap_write(svm);
4776 } else {
4777
4778 ret = kvm_emulate_instruction(&svm->vcpu, 0);
4779 }
4780
4781 return ret;
4782 }
4783
4784 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
4785 [SVM_EXIT_READ_CR0] = cr_interception,
4786 [SVM_EXIT_READ_CR3] = cr_interception,
4787 [SVM_EXIT_READ_CR4] = cr_interception,
4788 [SVM_EXIT_READ_CR8] = cr_interception,
4789 [SVM_EXIT_CR0_SEL_WRITE] = cr_interception,
4790 [SVM_EXIT_WRITE_CR0] = cr_interception,
4791 [SVM_EXIT_WRITE_CR3] = cr_interception,
4792 [SVM_EXIT_WRITE_CR4] = cr_interception,
4793 [SVM_EXIT_WRITE_CR8] = cr8_write_interception,
4794 [SVM_EXIT_READ_DR0] = dr_interception,
4795 [SVM_EXIT_READ_DR1] = dr_interception,
4796 [SVM_EXIT_READ_DR2] = dr_interception,
4797 [SVM_EXIT_READ_DR3] = dr_interception,
4798 [SVM_EXIT_READ_DR4] = dr_interception,
4799 [SVM_EXIT_READ_DR5] = dr_interception,
4800 [SVM_EXIT_READ_DR6] = dr_interception,
4801 [SVM_EXIT_READ_DR7] = dr_interception,
4802 [SVM_EXIT_WRITE_DR0] = dr_interception,
4803 [SVM_EXIT_WRITE_DR1] = dr_interception,
4804 [SVM_EXIT_WRITE_DR2] = dr_interception,
4805 [SVM_EXIT_WRITE_DR3] = dr_interception,
4806 [SVM_EXIT_WRITE_DR4] = dr_interception,
4807 [SVM_EXIT_WRITE_DR5] = dr_interception,
4808 [SVM_EXIT_WRITE_DR6] = dr_interception,
4809 [SVM_EXIT_WRITE_DR7] = dr_interception,
4810 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
4811 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
4812 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
4813 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
4814 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
4815 [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
4816 [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
4817 [SVM_EXIT_INTR] = intr_interception,
4818 [SVM_EXIT_NMI] = nmi_interception,
4819 [SVM_EXIT_SMI] = nop_on_interception,
4820 [SVM_EXIT_INIT] = nop_on_interception,
4821 [SVM_EXIT_VINTR] = interrupt_window_interception,
4822 [SVM_EXIT_RDPMC] = rdpmc_interception,
4823 [SVM_EXIT_CPUID] = cpuid_interception,
4824 [SVM_EXIT_IRET] = iret_interception,
4825 [SVM_EXIT_INVD] = emulate_on_interception,
4826 [SVM_EXIT_PAUSE] = pause_interception,
4827 [SVM_EXIT_HLT] = halt_interception,
4828 [SVM_EXIT_INVLPG] = invlpg_interception,
4829 [SVM_EXIT_INVLPGA] = invlpga_interception,
4830 [SVM_EXIT_IOIO] = io_interception,
4831 [SVM_EXIT_MSR] = msr_interception,
4832 [SVM_EXIT_TASK_SWITCH] = task_switch_interception,
4833 [SVM_EXIT_SHUTDOWN] = shutdown_interception,
4834 [SVM_EXIT_VMRUN] = vmrun_interception,
4835 [SVM_EXIT_VMMCALL] = vmmcall_interception,
4836 [SVM_EXIT_VMLOAD] = vmload_interception,
4837 [SVM_EXIT_VMSAVE] = vmsave_interception,
4838 [SVM_EXIT_STGI] = stgi_interception,
4839 [SVM_EXIT_CLGI] = clgi_interception,
4840 [SVM_EXIT_SKINIT] = skinit_interception,
4841 [SVM_EXIT_WBINVD] = wbinvd_interception,
4842 [SVM_EXIT_MONITOR] = monitor_interception,
4843 [SVM_EXIT_MWAIT] = mwait_interception,
4844 [SVM_EXIT_XSETBV] = xsetbv_interception,
4845 [SVM_EXIT_RDPRU] = rdpru_interception,
4846 [SVM_EXIT_NPF] = npf_interception,
4847 [SVM_EXIT_RSM] = rsm_interception,
4848 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
4849 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
4850 };
4851
4852 static void dump_vmcb(struct kvm_vcpu *vcpu)
4853 {
4854 struct vcpu_svm *svm = to_svm(vcpu);
4855 struct vmcb_control_area *control = &svm->vmcb->control;
4856 struct vmcb_save_area *save = &svm->vmcb->save;
4857
4858 if (!dump_invalid_vmcb) {
4859 pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
4860 return;
4861 }
4862
4863 pr_err("VMCB Control Area:\n");
4864 pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
4865 pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
4866 pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
4867 pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
4868 pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
4869 pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
4870 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
4871 pr_err("%-20s%d\n", "pause filter threshold:",
4872 control->pause_filter_thresh);
4873 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
4874 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
4875 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
4876 pr_err("%-20s%d\n", "asid:", control->asid);
4877 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
4878 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
4879 pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
4880 pr_err("%-20s%08x\n", "int_state:", control->int_state);
4881 pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
4882 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
4883 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
4884 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
4885 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
4886 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
4887 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
4888 pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
4889 pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
4890 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
4891 pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
4892 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
4893 pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
4894 pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
4895 pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
4896 pr_err("VMCB State Save Area:\n");
4897 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4898 "es:",
4899 save->es.selector, save->es.attrib,
4900 save->es.limit, save->es.base);
4901 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4902 "cs:",
4903 save->cs.selector, save->cs.attrib,
4904 save->cs.limit, save->cs.base);
4905 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4906 "ss:",
4907 save->ss.selector, save->ss.attrib,
4908 save->ss.limit, save->ss.base);
4909 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4910 "ds:",
4911 save->ds.selector, save->ds.attrib,
4912 save->ds.limit, save->ds.base);
4913 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4914 "fs:",
4915 save->fs.selector, save->fs.attrib,
4916 save->fs.limit, save->fs.base);
4917 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4918 "gs:",
4919 save->gs.selector, save->gs.attrib,
4920 save->gs.limit, save->gs.base);
4921 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4922 "gdtr:",
4923 save->gdtr.selector, save->gdtr.attrib,
4924 save->gdtr.limit, save->gdtr.base);
4925 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4926 "ldtr:",
4927 save->ldtr.selector, save->ldtr.attrib,
4928 save->ldtr.limit, save->ldtr.base);
4929 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4930 "idtr:",
4931 save->idtr.selector, save->idtr.attrib,
4932 save->idtr.limit, save->idtr.base);
4933 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
4934 "tr:",
4935 save->tr.selector, save->tr.attrib,
4936 save->tr.limit, save->tr.base);
4937 pr_err("cpl: %d efer: %016llx\n",
4938 save->cpl, save->efer);
4939 pr_err("%-15s %016llx %-13s %016llx\n",
4940 "cr0:", save->cr0, "cr2:", save->cr2);
4941 pr_err("%-15s %016llx %-13s %016llx\n",
4942 "cr3:", save->cr3, "cr4:", save->cr4);
4943 pr_err("%-15s %016llx %-13s %016llx\n",
4944 "dr6:", save->dr6, "dr7:", save->dr7);
4945 pr_err("%-15s %016llx %-13s %016llx\n",
4946 "rip:", save->rip, "rflags:", save->rflags);
4947 pr_err("%-15s %016llx %-13s %016llx\n",
4948 "rsp:", save->rsp, "rax:", save->rax);
4949 pr_err("%-15s %016llx %-13s %016llx\n",
4950 "star:", save->star, "lstar:", save->lstar);
4951 pr_err("%-15s %016llx %-13s %016llx\n",
4952 "cstar:", save->cstar, "sfmask:", save->sfmask);
4953 pr_err("%-15s %016llx %-13s %016llx\n",
4954 "kernel_gs_base:", save->kernel_gs_base,
4955 "sysenter_cs:", save->sysenter_cs);
4956 pr_err("%-15s %016llx %-13s %016llx\n",
4957 "sysenter_esp:", save->sysenter_esp,
4958 "sysenter_eip:", save->sysenter_eip);
4959 pr_err("%-15s %016llx %-13s %016llx\n",
4960 "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
4961 pr_err("%-15s %016llx %-13s %016llx\n",
4962 "br_from:", save->br_from, "br_to:", save->br_to);
4963 pr_err("%-15s %016llx %-13s %016llx\n",
4964 "excp_from:", save->last_excp_from,
4965 "excp_to:", save->last_excp_to);
4966 }
4967
4968 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
4969 {
4970 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
4971
4972 *info1 = control->exit_info_1;
4973 *info2 = control->exit_info_2;
4974 }
4975
4976 static int handle_exit(struct kvm_vcpu *vcpu)
4977 {
4978 struct vcpu_svm *svm = to_svm(vcpu);
4979 struct kvm_run *kvm_run = vcpu->run;
4980 u32 exit_code = svm->vmcb->control.exit_code;
4981
4982 trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
4983
4984 if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
4985 vcpu->arch.cr0 = svm->vmcb->save.cr0;
4986 if (npt_enabled)
4987 vcpu->arch.cr3 = svm->vmcb->save.cr3;
4988
4989 if (unlikely(svm->nested.exit_required)) {
4990 nested_svm_vmexit(svm);
4991 svm->nested.exit_required = false;
4992
4993 return 1;
4994 }
4995
4996 if (is_guest_mode(vcpu)) {
4997 int vmexit;
4998
4999 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
5000 svm->vmcb->control.exit_info_1,
5001 svm->vmcb->control.exit_info_2,
5002 svm->vmcb->control.exit_int_info,
5003 svm->vmcb->control.exit_int_info_err,
5004 KVM_ISA_SVM);
5005
5006 vmexit = nested_svm_exit_special(svm);
5007
5008 if (vmexit == NESTED_EXIT_CONTINUE)
5009 vmexit = nested_svm_exit_handled(svm);
5010
5011 if (vmexit == NESTED_EXIT_DONE)
5012 return 1;
5013 }
5014
5015 svm_complete_interrupts(svm);
5016
5017 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
5018 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
5019 kvm_run->fail_entry.hardware_entry_failure_reason
5020 = svm->vmcb->control.exit_code;
5021 dump_vmcb(vcpu);
5022 return 0;
5023 }
5024
5025 if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
5026 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
5027 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
5028 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
5029 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x "
5030 "exit_code 0x%x\n",
5031 __func__, svm->vmcb->control.exit_int_info,
5032 exit_code);
5033
5034 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
5035 || !svm_exit_handlers[exit_code]) {
5036 vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
5037 dump_vmcb(vcpu);
5038 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
5039 vcpu->run->internal.suberror =
5040 KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
5041 vcpu->run->internal.ndata = 1;
5042 vcpu->run->internal.data[0] = exit_code;
5043 return 0;
5044 }
5045
5046 return svm_exit_handlers[exit_code](svm);
5047 }
5048
5049 static void reload_tss(struct kvm_vcpu *vcpu)
5050 {
5051 int cpu = raw_smp_processor_id();
5052
5053 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
5054 sd->tss_desc->type = 9;
5055 load_TR_desc();
5056 }
5057
5058 static void pre_sev_run(struct vcpu_svm *svm, int cpu)
5059 {
5060 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
5061 int asid = sev_get_asid(svm->vcpu.kvm);
5062
5063
5064 svm->vmcb->control.asid = asid;
5065
5066
5067
5068
5069
5070
5071
5072 if (sd->sev_vmcbs[asid] == svm->vmcb &&
5073 svm->last_cpu == cpu)
5074 return;
5075
5076 svm->last_cpu = cpu;
5077 sd->sev_vmcbs[asid] = svm->vmcb;
5078 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
5079 mark_dirty(svm->vmcb, VMCB_ASID);
5080 }
5081
5082 static void pre_svm_run(struct vcpu_svm *svm)
5083 {
5084 int cpu = raw_smp_processor_id();
5085
5086 struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
5087
5088 if (sev_guest(svm->vcpu.kvm))
5089 return pre_sev_run(svm, cpu);
5090
5091
5092 if (svm->asid_generation != sd->asid_generation)
5093 new_asid(svm, sd);
5094 }
5095
5096 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
5097 {
5098 struct vcpu_svm *svm = to_svm(vcpu);
5099
5100 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
5101 vcpu->arch.hflags |= HF_NMI_MASK;
5102 set_intercept(svm, INTERCEPT_IRET);
5103 ++vcpu->stat.nmi_injections;
5104 }
5105
5106 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
5107 {
5108 struct vmcb_control_area *control;
5109
5110
5111 control = &svm->vmcb->control;
5112 control->int_vector = irq;
5113 control->int_ctl &= ~V_INTR_PRIO_MASK;
5114 control->int_ctl |= V_IRQ_MASK |
5115 (( 0xf) << V_INTR_PRIO_SHIFT);
5116 mark_dirty(svm->vmcb, VMCB_INTR);
5117 }
5118
5119 static void svm_set_irq(struct kvm_vcpu *vcpu)
5120 {
5121 struct vcpu_svm *svm = to_svm(vcpu);
5122
5123 BUG_ON(!(gif_set(svm)));
5124
5125 trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
5126 ++vcpu->stat.irq_injections;
5127
5128 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
5129 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
5130 }
5131
5132 static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
5133 {
5134 return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
5135 }
5136
5137 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
5138 {
5139 struct vcpu_svm *svm = to_svm(vcpu);
5140
5141 if (svm_nested_virtualize_tpr(vcpu) ||
5142 kvm_vcpu_apicv_active(vcpu))
5143 return;
5144
5145 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
5146
5147 if (irr == -1)
5148 return;
5149
5150 if (tpr >= irr)
5151 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
5152 }
5153
5154 static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
5155 {
5156 return;
5157 }
5158
5159 static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu)
5160 {
5161 return avic && irqchip_split(vcpu->kvm);
5162 }
5163
5164 static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
5165 {
5166 }
5167
5168 static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
5169 {
5170 }
5171
5172
5173 static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
5174 {
5175 struct vcpu_svm *svm = to_svm(vcpu);
5176 struct vmcb *vmcb = svm->vmcb;
5177
5178 if (kvm_vcpu_apicv_active(vcpu))
5179 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
5180 else
5181 vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
5182 mark_dirty(vmcb, VMCB_AVIC);
5183 }
5184
5185 static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
5186 {
5187 return;
5188 }
5189
5190 static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
5191 {
5192 if (!vcpu->arch.apicv_active)
5193 return -1;
5194
5195 kvm_lapic_set_irr(vec, vcpu->arch.apic);
5196 smp_mb__after_atomic();
5197
5198 if (avic_vcpu_is_running(vcpu)) {
5199 int cpuid = vcpu->cpu;
5200
5201 if (cpuid != get_cpu())
5202 wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
5203 put_cpu();
5204 } else
5205 kvm_vcpu_wake_up(vcpu);
5206
5207 return 0;
5208 }
5209
5210 static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
5211 {
5212 return false;
5213 }
5214
5215 static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
5216 {
5217 unsigned long flags;
5218 struct amd_svm_iommu_ir *cur;
5219
5220 spin_lock_irqsave(&svm->ir_list_lock, flags);
5221 list_for_each_entry(cur, &svm->ir_list, node) {
5222 if (cur->data != pi->ir_data)
5223 continue;
5224 list_del(&cur->node);
5225 kfree(cur);
5226 break;
5227 }
5228 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
5229 }
5230
5231 static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
5232 {
5233 int ret = 0;
5234 unsigned long flags;
5235 struct amd_svm_iommu_ir *ir;
5236
5237
5238
5239
5240
5241
5242 if (pi->ir_data && (pi->prev_ga_tag != 0)) {
5243 struct kvm *kvm = svm->vcpu.kvm;
5244 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
5245 struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
5246 struct vcpu_svm *prev_svm;
5247
5248 if (!prev_vcpu) {
5249 ret = -EINVAL;
5250 goto out;
5251 }
5252
5253 prev_svm = to_svm(prev_vcpu);
5254 svm_ir_list_del(prev_svm, pi);
5255 }
5256
5257
5258
5259
5260
5261 ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
5262 if (!ir) {
5263 ret = -ENOMEM;
5264 goto out;
5265 }
5266 ir->data = pi->ir_data;
5267
5268 spin_lock_irqsave(&svm->ir_list_lock, flags);
5269 list_add(&ir->node, &svm->ir_list);
5270 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
5271 out:
5272 return ret;
5273 }
5274
5275
5276
5277
5278
5279
5280
5281
5282
5283
5284
5285
5286 static int
5287 get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
5288 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
5289 {
5290 struct kvm_lapic_irq irq;
5291 struct kvm_vcpu *vcpu = NULL;
5292
5293 kvm_set_msi_irq(kvm, e, &irq);
5294
5295 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
5296 !kvm_irq_is_postable(&irq)) {
5297 pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
5298 __func__, irq.vector);
5299 return -1;
5300 }
5301
5302 pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
5303 irq.vector);
5304 *svm = to_svm(vcpu);
5305 vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
5306 vcpu_info->vector = irq.vector;
5307
5308 return 0;
5309 }
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320 static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
5321 uint32_t guest_irq, bool set)
5322 {
5323 struct kvm_kernel_irq_routing_entry *e;
5324 struct kvm_irq_routing_table *irq_rt;
5325 int idx, ret = -EINVAL;
5326
5327 if (!kvm_arch_has_assigned_device(kvm) ||
5328 !irq_remapping_cap(IRQ_POSTING_CAP))
5329 return 0;
5330
5331 pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
5332 __func__, host_irq, guest_irq, set);
5333
5334 idx = srcu_read_lock(&kvm->irq_srcu);
5335 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
5336 WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
5337
5338 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
5339 struct vcpu_data vcpu_info;
5340 struct vcpu_svm *svm = NULL;
5341
5342 if (e->type != KVM_IRQ_ROUTING_MSI)
5343 continue;
5344
5345
5346
5347
5348
5349
5350
5351
5352 if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
5353 kvm_vcpu_apicv_active(&svm->vcpu)) {
5354 struct amd_iommu_pi_data pi;
5355
5356
5357 pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
5358 AVIC_HPA_MASK);
5359 pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
5360 svm->vcpu.vcpu_id);
5361 pi.is_guest_mode = true;
5362 pi.vcpu_data = &vcpu_info;
5363 ret = irq_set_vcpu_affinity(host_irq, &pi);
5364
5365
5366
5367
5368
5369
5370
5371
5372 if (!ret && pi.is_guest_mode)
5373 svm_ir_list_add(svm, &pi);
5374 } else {
5375
5376 struct amd_iommu_pi_data pi;
5377
5378
5379
5380
5381
5382
5383 pi.is_guest_mode = false;
5384 ret = irq_set_vcpu_affinity(host_irq, &pi);
5385
5386
5387
5388
5389
5390
5391
5392 if (!ret && pi.prev_ga_tag) {
5393 int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
5394 struct kvm_vcpu *vcpu;
5395
5396 vcpu = kvm_get_vcpu_by_id(kvm, id);
5397 if (vcpu)
5398 svm_ir_list_del(to_svm(vcpu), &pi);
5399 }
5400 }
5401
5402 if (!ret && svm) {
5403 trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
5404 e->gsi, vcpu_info.vector,
5405 vcpu_info.pi_desc_addr, set);
5406 }
5407
5408 if (ret < 0) {
5409 pr_err("%s: failed to update PI IRTE\n", __func__);
5410 goto out;
5411 }
5412 }
5413
5414 ret = 0;
5415 out:
5416 srcu_read_unlock(&kvm->irq_srcu, idx);
5417 return ret;
5418 }
5419
5420 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
5421 {
5422 struct vcpu_svm *svm = to_svm(vcpu);
5423 struct vmcb *vmcb = svm->vmcb;
5424 int ret;
5425 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
5426 !(svm->vcpu.arch.hflags & HF_NMI_MASK);
5427 ret = ret && gif_set(svm) && nested_svm_nmi(svm);
5428
5429 return ret;
5430 }
5431
5432 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
5433 {
5434 struct vcpu_svm *svm = to_svm(vcpu);
5435
5436 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
5437 }
5438
5439 static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
5440 {
5441 struct vcpu_svm *svm = to_svm(vcpu);
5442
5443 if (masked) {
5444 svm->vcpu.arch.hflags |= HF_NMI_MASK;
5445 set_intercept(svm, INTERCEPT_IRET);
5446 } else {
5447 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
5448 clr_intercept(svm, INTERCEPT_IRET);
5449 }
5450 }
5451
5452 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
5453 {
5454 struct vcpu_svm *svm = to_svm(vcpu);
5455 struct vmcb *vmcb = svm->vmcb;
5456 int ret;
5457
5458 if (!gif_set(svm) ||
5459 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
5460 return 0;
5461
5462 ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
5463
5464 if (is_guest_mode(vcpu))
5465 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
5466
5467 return ret;
5468 }
5469
5470 static void enable_irq_window(struct kvm_vcpu *vcpu)
5471 {
5472 struct vcpu_svm *svm = to_svm(vcpu);
5473
5474 if (kvm_vcpu_apicv_active(vcpu))
5475 return;
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485 if ((vgif_enabled(svm) || gif_set(svm)) && nested_svm_intr(svm)) {
5486 svm_set_vintr(svm);
5487 svm_inject_irq(svm, 0x0);
5488 }
5489 }
5490
5491 static void enable_nmi_window(struct kvm_vcpu *vcpu)
5492 {
5493 struct vcpu_svm *svm = to_svm(vcpu);
5494
5495 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
5496 == HF_NMI_MASK)
5497 return;
5498
5499 if (!gif_set(svm)) {
5500 if (vgif_enabled(svm))
5501 set_intercept(svm, INTERCEPT_STGI);
5502 return;
5503 }
5504
5505 if (svm->nested.exit_required)
5506 return;
5507
5508
5509
5510
5511
5512 svm->nmi_singlestep_guest_rflags = svm_get_rflags(vcpu);
5513 svm->nmi_singlestep = true;
5514 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
5515 }
5516
5517 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
5518 {
5519 return 0;
5520 }
5521
5522 static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
5523 {
5524 return 0;
5525 }
5526
5527 static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
5528 {
5529 struct vcpu_svm *svm = to_svm(vcpu);
5530
5531 if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
5532 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
5533 else
5534 svm->asid_generation--;
5535 }
5536
5537 static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
5538 {
5539 struct vcpu_svm *svm = to_svm(vcpu);
5540
5541 invlpga(gva, svm->vmcb->control.asid);
5542 }
5543
5544 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
5545 {
5546 }
5547
5548 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
5549 {
5550 struct vcpu_svm *svm = to_svm(vcpu);
5551
5552 if (svm_nested_virtualize_tpr(vcpu))
5553 return;
5554
5555 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
5556 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
5557 kvm_set_cr8(vcpu, cr8);
5558 }
5559 }
5560
5561 static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
5562 {
5563 struct vcpu_svm *svm = to_svm(vcpu);
5564 u64 cr8;
5565
5566 if (svm_nested_virtualize_tpr(vcpu) ||
5567 kvm_vcpu_apicv_active(vcpu))
5568 return;
5569
5570 cr8 = kvm_get_cr8(vcpu);
5571 svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
5572 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
5573 }
5574
5575 static void svm_complete_interrupts(struct vcpu_svm *svm)
5576 {
5577 u8 vector;
5578 int type;
5579 u32 exitintinfo = svm->vmcb->control.exit_int_info;
5580 unsigned int3_injected = svm->int3_injected;
5581
5582 svm->int3_injected = 0;
5583
5584
5585
5586
5587
5588 if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
5589 && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
5590 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
5591 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
5592 }
5593
5594 svm->vcpu.arch.nmi_injected = false;
5595 kvm_clear_exception_queue(&svm->vcpu);
5596 kvm_clear_interrupt_queue(&svm->vcpu);
5597
5598 if (!(exitintinfo & SVM_EXITINTINFO_VALID))
5599 return;
5600
5601 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
5602
5603 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
5604 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
5605
5606 switch (type) {
5607 case SVM_EXITINTINFO_TYPE_NMI:
5608 svm->vcpu.arch.nmi_injected = true;
5609 break;
5610 case SVM_EXITINTINFO_TYPE_EXEPT:
5611
5612
5613
5614
5615
5616 if (kvm_exception_is_soft(vector)) {
5617 if (vector == BP_VECTOR && int3_injected &&
5618 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
5619 kvm_rip_write(&svm->vcpu,
5620 kvm_rip_read(&svm->vcpu) -
5621 int3_injected);
5622 break;
5623 }
5624 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
5625 u32 err = svm->vmcb->control.exit_int_info_err;
5626 kvm_requeue_exception_e(&svm->vcpu, vector, err);
5627
5628 } else
5629 kvm_requeue_exception(&svm->vcpu, vector);
5630 break;
5631 case SVM_EXITINTINFO_TYPE_INTR:
5632 kvm_queue_interrupt(&svm->vcpu, vector, false);
5633 break;
5634 default:
5635 break;
5636 }
5637 }
5638
5639 static void svm_cancel_injection(struct kvm_vcpu *vcpu)
5640 {
5641 struct vcpu_svm *svm = to_svm(vcpu);
5642 struct vmcb_control_area *control = &svm->vmcb->control;
5643
5644 control->exit_int_info = control->event_inj;
5645 control->exit_int_info_err = control->event_inj_err;
5646 control->event_inj = 0;
5647 svm_complete_interrupts(svm);
5648 }
5649
5650 static void svm_vcpu_run(struct kvm_vcpu *vcpu)
5651 {
5652 struct vcpu_svm *svm = to_svm(vcpu);
5653
5654 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
5655 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
5656 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
5657
5658
5659
5660
5661
5662 if (unlikely(svm->nested.exit_required))
5663 return;
5664
5665
5666
5667
5668
5669
5670
5671 if (svm->nmi_singlestep && svm->vmcb->control.event_inj) {
5672
5673
5674
5675
5676
5677 disable_nmi_singlestep(svm);
5678 smp_send_reschedule(vcpu->cpu);
5679 }
5680
5681 pre_svm_run(svm);
5682
5683 sync_lapic_to_cr8(vcpu);
5684
5685 svm->vmcb->save.cr2 = vcpu->arch.cr2;
5686
5687 clgi();
5688 kvm_load_guest_xcr0(vcpu);
5689
5690 if (lapic_in_kernel(vcpu) &&
5691 vcpu->arch.apic->lapic_timer.timer_advance_ns)
5692 kvm_wait_lapic_expire(vcpu);
5693
5694
5695
5696
5697
5698
5699
5700 x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
5701
5702 local_irq_enable();
5703
5704 asm volatile (
5705 "push %%" _ASM_BP "; \n\t"
5706 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
5707 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
5708 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
5709 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
5710 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
5711 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
5712 #ifdef CONFIG_X86_64
5713 "mov %c[r8](%[svm]), %%r8 \n\t"
5714 "mov %c[r9](%[svm]), %%r9 \n\t"
5715 "mov %c[r10](%[svm]), %%r10 \n\t"
5716 "mov %c[r11](%[svm]), %%r11 \n\t"
5717 "mov %c[r12](%[svm]), %%r12 \n\t"
5718 "mov %c[r13](%[svm]), %%r13 \n\t"
5719 "mov %c[r14](%[svm]), %%r14 \n\t"
5720 "mov %c[r15](%[svm]), %%r15 \n\t"
5721 #endif
5722
5723
5724 "push %%" _ASM_AX " \n\t"
5725 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
5726 __ex("vmload %%" _ASM_AX) "\n\t"
5727 __ex("vmrun %%" _ASM_AX) "\n\t"
5728 __ex("vmsave %%" _ASM_AX) "\n\t"
5729 "pop %%" _ASM_AX " \n\t"
5730
5731
5732 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
5733 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
5734 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
5735 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
5736 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
5737 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
5738 #ifdef CONFIG_X86_64
5739 "mov %%r8, %c[r8](%[svm]) \n\t"
5740 "mov %%r9, %c[r9](%[svm]) \n\t"
5741 "mov %%r10, %c[r10](%[svm]) \n\t"
5742 "mov %%r11, %c[r11](%[svm]) \n\t"
5743 "mov %%r12, %c[r12](%[svm]) \n\t"
5744 "mov %%r13, %c[r13](%[svm]) \n\t"
5745 "mov %%r14, %c[r14](%[svm]) \n\t"
5746 "mov %%r15, %c[r15](%[svm]) \n\t"
5747
5748
5749
5750
5751 "xor %%r8d, %%r8d \n\t"
5752 "xor %%r9d, %%r9d \n\t"
5753 "xor %%r10d, %%r10d \n\t"
5754 "xor %%r11d, %%r11d \n\t"
5755 "xor %%r12d, %%r12d \n\t"
5756 "xor %%r13d, %%r13d \n\t"
5757 "xor %%r14d, %%r14d \n\t"
5758 "xor %%r15d, %%r15d \n\t"
5759 #endif
5760 "xor %%ebx, %%ebx \n\t"
5761 "xor %%ecx, %%ecx \n\t"
5762 "xor %%edx, %%edx \n\t"
5763 "xor %%esi, %%esi \n\t"
5764 "xor %%edi, %%edi \n\t"
5765 "pop %%" _ASM_BP
5766 :
5767 : [svm]"a"(svm),
5768 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
5769 [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
5770 [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
5771 [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
5772 [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
5773 [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
5774 [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
5775 #ifdef CONFIG_X86_64
5776 , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
5777 [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
5778 [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
5779 [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
5780 [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
5781 [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
5782 [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
5783 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
5784 #endif
5785 : "cc", "memory"
5786 #ifdef CONFIG_X86_64
5787 , "rbx", "rcx", "rdx", "rsi", "rdi"
5788 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
5789 #else
5790 , "ebx", "ecx", "edx", "esi", "edi"
5791 #endif
5792 );
5793
5794
5795 vmexit_fill_RSB();
5796
5797 #ifdef CONFIG_X86_64
5798 wrmsrl(MSR_GS_BASE, svm->host.gs_base);
5799 #else
5800 loadsegment(fs, svm->host.fs);
5801 #ifndef CONFIG_X86_32_LAZY_GS
5802 loadsegment(gs, svm->host.gs);
5803 #endif
5804 #endif
5805
5806
5807
5808
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820
5821 if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
5822 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
5823
5824 reload_tss(vcpu);
5825
5826 local_irq_disable();
5827
5828 x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
5829
5830 vcpu->arch.cr2 = svm->vmcb->save.cr2;
5831 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
5832 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
5833 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
5834
5835 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
5836 kvm_before_interrupt(&svm->vcpu);
5837
5838 kvm_put_guest_xcr0(vcpu);
5839 stgi();
5840
5841
5842
5843 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
5844 kvm_after_interrupt(&svm->vcpu);
5845
5846 sync_cr8_to_lapic(vcpu);
5847
5848 svm->next_rip = 0;
5849
5850 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
5851
5852
5853 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
5854 svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
5855
5856 if (npt_enabled) {
5857 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
5858 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
5859 }
5860
5861
5862
5863
5864
5865 if (unlikely(svm->vmcb->control.exit_code ==
5866 SVM_EXIT_EXCP_BASE + MC_VECTOR))
5867 svm_handle_mce(svm);
5868
5869 mark_all_clean(svm->vmcb);
5870 }
5871 STACK_FRAME_NON_STANDARD(svm_vcpu_run);
5872
5873 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
5874 {
5875 struct vcpu_svm *svm = to_svm(vcpu);
5876
5877 svm->vmcb->save.cr3 = __sme_set(root);
5878 mark_dirty(svm->vmcb, VMCB_CR);
5879 }
5880
5881 static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
5882 {
5883 struct vcpu_svm *svm = to_svm(vcpu);
5884
5885 svm->vmcb->control.nested_cr3 = __sme_set(root);
5886 mark_dirty(svm->vmcb, VMCB_NPT);
5887
5888
5889 svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
5890 mark_dirty(svm->vmcb, VMCB_CR);
5891 }
5892
5893 static int is_disabled(void)
5894 {
5895 u64 vm_cr;
5896
5897 rdmsrl(MSR_VM_CR, vm_cr);
5898 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
5899 return 1;
5900
5901 return 0;
5902 }
5903
5904 static void
5905 svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
5906 {
5907
5908
5909
5910 hypercall[0] = 0x0f;
5911 hypercall[1] = 0x01;
5912 hypercall[2] = 0xd9;
5913 }
5914
5915 static int __init svm_check_processor_compat(void)
5916 {
5917 return 0;
5918 }
5919
5920 static bool svm_cpu_has_accelerated_tpr(void)
5921 {
5922 return false;
5923 }
5924
5925 static bool svm_has_emulated_msr(int index)
5926 {
5927 switch (index) {
5928 case MSR_IA32_MCG_EXT_CTL:
5929 case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
5930 return false;
5931 default:
5932 break;
5933 }
5934
5935 return true;
5936 }
5937
5938 static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
5939 {
5940 return 0;
5941 }
5942
5943 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
5944 {
5945 struct vcpu_svm *svm = to_svm(vcpu);
5946
5947
5948 svm->nrips_enabled = !!guest_cpuid_has(&svm->vcpu, X86_FEATURE_NRIPS);
5949
5950 if (!kvm_vcpu_apicv_active(vcpu))
5951 return;
5952
5953 guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
5954 }
5955
5956 #define F(x) bit(X86_FEATURE_##x)
5957
5958 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
5959 {
5960 switch (func) {
5961 case 0x1:
5962 if (avic)
5963 entry->ecx &= ~bit(X86_FEATURE_X2APIC);
5964 break;
5965 case 0x80000001:
5966 if (nested)
5967 entry->ecx |= (1 << 2);
5968 break;
5969 case 0x80000008:
5970 if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
5971 boot_cpu_has(X86_FEATURE_AMD_SSBD))
5972 entry->ebx |= F(VIRT_SSBD);
5973 break;
5974 case 0x8000000A:
5975 entry->eax = 1;
5976 entry->ebx = 8;
5977
5978 entry->ecx = 0;
5979 entry->edx = 0;
5980
5981
5982
5983 if (boot_cpu_has(X86_FEATURE_NRIPS))
5984 entry->edx |= F(NRIPS);
5985
5986
5987 if (npt_enabled)
5988 entry->edx |= F(NPT);
5989
5990 break;
5991 case 0x8000001F:
5992
5993 if (boot_cpu_has(X86_FEATURE_SEV))
5994 cpuid(0x8000001f, &entry->eax, &entry->ebx,
5995 &entry->ecx, &entry->edx);
5996
5997 }
5998 }
5999
6000 static int svm_get_lpage_level(void)
6001 {
6002 return PT_PDPE_LEVEL;
6003 }
6004
6005 static bool svm_rdtscp_supported(void)
6006 {
6007 return boot_cpu_has(X86_FEATURE_RDTSCP);
6008 }
6009
6010 static bool svm_invpcid_supported(void)
6011 {
6012 return false;
6013 }
6014
6015 static bool svm_mpx_supported(void)
6016 {
6017 return false;
6018 }
6019
6020 static bool svm_xsaves_supported(void)
6021 {
6022 return false;
6023 }
6024
6025 static bool svm_umip_emulated(void)
6026 {
6027 return false;
6028 }
6029
6030 static bool svm_pt_supported(void)
6031 {
6032 return false;
6033 }
6034
6035 static bool svm_has_wbinvd_exit(void)
6036 {
6037 return true;
6038 }
6039
6040 static bool svm_pku_supported(void)
6041 {
6042 return false;
6043 }
6044
6045 #define PRE_EX(exit) { .exit_code = (exit), \
6046 .stage = X86_ICPT_PRE_EXCEPT, }
6047 #define POST_EX(exit) { .exit_code = (exit), \
6048 .stage = X86_ICPT_POST_EXCEPT, }
6049 #define POST_MEM(exit) { .exit_code = (exit), \
6050 .stage = X86_ICPT_POST_MEMACCESS, }
6051
6052 static const struct __x86_intercept {
6053 u32 exit_code;
6054 enum x86_intercept_stage stage;
6055 } x86_intercept_map[] = {
6056 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0),
6057 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0),
6058 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0),
6059 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0),
6060 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0),
6061 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0),
6062 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0),
6063 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ),
6064 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ),
6065 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE),
6066 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE),
6067 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ),
6068 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ),
6069 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE),
6070 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE),
6071 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN),
6072 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL),
6073 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD),
6074 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE),
6075 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI),
6076 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI),
6077 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT),
6078 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA),
6079 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP),
6080 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR),
6081 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT),
6082 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG),
6083 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD),
6084 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD),
6085 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR),
6086 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC),
6087 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR),
6088 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC),
6089 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID),
6090 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM),
6091 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE),
6092 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF),
6093 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF),
6094 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT),
6095 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET),
6096 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP),
6097 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT),
6098 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO),
6099 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
6100 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
6101 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
6102 [x86_intercept_xsetbv] = PRE_EX(SVM_EXIT_XSETBV),
6103 };
6104
6105 #undef PRE_EX
6106 #undef POST_EX
6107 #undef POST_MEM
6108
6109 static int svm_check_intercept(struct kvm_vcpu *vcpu,
6110 struct x86_instruction_info *info,
6111 enum x86_intercept_stage stage)
6112 {
6113 struct vcpu_svm *svm = to_svm(vcpu);
6114 int vmexit, ret = X86EMUL_CONTINUE;
6115 struct __x86_intercept icpt_info;
6116 struct vmcb *vmcb = svm->vmcb;
6117
6118 if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
6119 goto out;
6120
6121 icpt_info = x86_intercept_map[info->intercept];
6122
6123 if (stage != icpt_info.stage)
6124 goto out;
6125
6126 switch (icpt_info.exit_code) {
6127 case SVM_EXIT_READ_CR0:
6128 if (info->intercept == x86_intercept_cr_read)
6129 icpt_info.exit_code += info->modrm_reg;
6130 break;
6131 case SVM_EXIT_WRITE_CR0: {
6132 unsigned long cr0, val;
6133 u64 intercept;
6134
6135 if (info->intercept == x86_intercept_cr_write)
6136 icpt_info.exit_code += info->modrm_reg;
6137
6138 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
6139 info->intercept == x86_intercept_clts)
6140 break;
6141
6142 intercept = svm->nested.intercept;
6143
6144 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
6145 break;
6146
6147 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
6148 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
6149
6150 if (info->intercept == x86_intercept_lmsw) {
6151 cr0 &= 0xfUL;
6152 val &= 0xfUL;
6153
6154 if (cr0 & X86_CR0_PE)
6155 val |= X86_CR0_PE;
6156 }
6157
6158 if (cr0 ^ val)
6159 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
6160
6161 break;
6162 }
6163 case SVM_EXIT_READ_DR0:
6164 case SVM_EXIT_WRITE_DR0:
6165 icpt_info.exit_code += info->modrm_reg;
6166 break;
6167 case SVM_EXIT_MSR:
6168 if (info->intercept == x86_intercept_wrmsr)
6169 vmcb->control.exit_info_1 = 1;
6170 else
6171 vmcb->control.exit_info_1 = 0;
6172 break;
6173 case SVM_EXIT_PAUSE:
6174
6175
6176
6177
6178 if (info->rep_prefix != REPE_PREFIX)
6179 goto out;
6180 break;
6181 case SVM_EXIT_IOIO: {
6182 u64 exit_info;
6183 u32 bytes;
6184
6185 if (info->intercept == x86_intercept_in ||
6186 info->intercept == x86_intercept_ins) {
6187 exit_info = ((info->src_val & 0xffff) << 16) |
6188 SVM_IOIO_TYPE_MASK;
6189 bytes = info->dst_bytes;
6190 } else {
6191 exit_info = (info->dst_val & 0xffff) << 16;
6192 bytes = info->src_bytes;
6193 }
6194
6195 if (info->intercept == x86_intercept_outs ||
6196 info->intercept == x86_intercept_ins)
6197 exit_info |= SVM_IOIO_STR_MASK;
6198
6199 if (info->rep_prefix)
6200 exit_info |= SVM_IOIO_REP_MASK;
6201
6202 bytes = min(bytes, 4u);
6203
6204 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
6205
6206 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
6207
6208 vmcb->control.exit_info_1 = exit_info;
6209 vmcb->control.exit_info_2 = info->next_rip;
6210
6211 break;
6212 }
6213 default:
6214 break;
6215 }
6216
6217
6218 if (static_cpu_has(X86_FEATURE_NRIPS))
6219 vmcb->control.next_rip = info->next_rip;
6220 vmcb->control.exit_code = icpt_info.exit_code;
6221 vmexit = nested_svm_exit_handled(svm);
6222
6223 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
6224 : X86EMUL_CONTINUE;
6225
6226 out:
6227 return ret;
6228 }
6229
6230 static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
6231 {
6232
6233 }
6234
6235 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
6236 {
6237 if (pause_filter_thresh)
6238 shrink_ple_window(vcpu);
6239 }
6240
6241 static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
6242 {
6243 if (avic_handle_apic_id_update(vcpu) != 0)
6244 return;
6245 avic_handle_dfr_update(vcpu);
6246 avic_handle_ldr_update(vcpu);
6247 }
6248
6249 static void svm_setup_mce(struct kvm_vcpu *vcpu)
6250 {
6251
6252 vcpu->arch.mcg_cap &= 0x1ff;
6253 }
6254
6255 static int svm_smi_allowed(struct kvm_vcpu *vcpu)
6256 {
6257 struct vcpu_svm *svm = to_svm(vcpu);
6258
6259
6260 if (!gif_set(svm))
6261 return 0;
6262
6263 if (is_guest_mode(&svm->vcpu) &&
6264 svm->nested.intercept & (1ULL << INTERCEPT_SMI)) {
6265
6266 svm->vmcb->control.exit_code = SVM_EXIT_SMI;
6267 svm->nested.exit_required = true;
6268 return 0;
6269 }
6270
6271 return 1;
6272 }
6273
6274 static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
6275 {
6276 struct vcpu_svm *svm = to_svm(vcpu);
6277 int ret;
6278
6279 if (is_guest_mode(vcpu)) {
6280
6281 put_smstate(u64, smstate, 0x7ed8, 1);
6282
6283 put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb);
6284
6285 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
6286 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
6287 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
6288
6289 ret = nested_svm_vmexit(svm);
6290 if (ret)
6291 return ret;
6292 }
6293 return 0;
6294 }
6295
6296 static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
6297 {
6298 struct vcpu_svm *svm = to_svm(vcpu);
6299 struct vmcb *nested_vmcb;
6300 struct kvm_host_map map;
6301 u64 guest;
6302 u64 vmcb;
6303
6304 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
6305 vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
6306
6307 if (guest) {
6308 if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
6309 return 1;
6310 nested_vmcb = map.hva;
6311 enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map);
6312 }
6313 return 0;
6314 }
6315
6316 static int enable_smi_window(struct kvm_vcpu *vcpu)
6317 {
6318 struct vcpu_svm *svm = to_svm(vcpu);
6319
6320 if (!gif_set(svm)) {
6321 if (vgif_enabled(svm))
6322 set_intercept(svm, INTERCEPT_STGI);
6323
6324 return 1;
6325 }
6326 return 0;
6327 }
6328
6329 static int sev_asid_new(void)
6330 {
6331 int pos;
6332
6333
6334
6335
6336 pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
6337 if (pos >= max_sev_asid)
6338 return -EBUSY;
6339
6340 set_bit(pos, sev_asid_bitmap);
6341 return pos + 1;
6342 }
6343
6344 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
6345 {
6346 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6347 int asid, ret;
6348
6349 ret = -EBUSY;
6350 if (unlikely(sev->active))
6351 return ret;
6352
6353 asid = sev_asid_new();
6354 if (asid < 0)
6355 return ret;
6356
6357 ret = sev_platform_init(&argp->error);
6358 if (ret)
6359 goto e_free;
6360
6361 sev->active = true;
6362 sev->asid = asid;
6363 INIT_LIST_HEAD(&sev->regions_list);
6364
6365 return 0;
6366
6367 e_free:
6368 __sev_asid_free(asid);
6369 return ret;
6370 }
6371
6372 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
6373 {
6374 struct sev_data_activate *data;
6375 int asid = sev_get_asid(kvm);
6376 int ret;
6377
6378 wbinvd_on_all_cpus();
6379
6380 ret = sev_guest_df_flush(error);
6381 if (ret)
6382 return ret;
6383
6384 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6385 if (!data)
6386 return -ENOMEM;
6387
6388
6389 data->handle = handle;
6390 data->asid = asid;
6391 ret = sev_guest_activate(data, error);
6392 kfree(data);
6393
6394 return ret;
6395 }
6396
6397 static int __sev_issue_cmd(int fd, int id, void *data, int *error)
6398 {
6399 struct fd f;
6400 int ret;
6401
6402 f = fdget(fd);
6403 if (!f.file)
6404 return -EBADF;
6405
6406 ret = sev_issue_cmd_external_user(f.file, id, data, error);
6407
6408 fdput(f);
6409 return ret;
6410 }
6411
6412 static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
6413 {
6414 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6415
6416 return __sev_issue_cmd(sev->fd, id, data, error);
6417 }
6418
6419 static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
6420 {
6421 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6422 struct sev_data_launch_start *start;
6423 struct kvm_sev_launch_start params;
6424 void *dh_blob, *session_blob;
6425 int *error = &argp->error;
6426 int ret;
6427
6428 if (!sev_guest(kvm))
6429 return -ENOTTY;
6430
6431 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6432 return -EFAULT;
6433
6434 start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
6435 if (!start)
6436 return -ENOMEM;
6437
6438 dh_blob = NULL;
6439 if (params.dh_uaddr) {
6440 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
6441 if (IS_ERR(dh_blob)) {
6442 ret = PTR_ERR(dh_blob);
6443 goto e_free;
6444 }
6445
6446 start->dh_cert_address = __sme_set(__pa(dh_blob));
6447 start->dh_cert_len = params.dh_len;
6448 }
6449
6450 session_blob = NULL;
6451 if (params.session_uaddr) {
6452 session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
6453 if (IS_ERR(session_blob)) {
6454 ret = PTR_ERR(session_blob);
6455 goto e_free_dh;
6456 }
6457
6458 start->session_address = __sme_set(__pa(session_blob));
6459 start->session_len = params.session_len;
6460 }
6461
6462 start->handle = params.handle;
6463 start->policy = params.policy;
6464
6465
6466 ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
6467 if (ret)
6468 goto e_free_session;
6469
6470
6471 ret = sev_bind_asid(kvm, start->handle, error);
6472 if (ret)
6473 goto e_free_session;
6474
6475
6476 params.handle = start->handle;
6477 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) {
6478 sev_unbind_asid(kvm, start->handle);
6479 ret = -EFAULT;
6480 goto e_free_session;
6481 }
6482
6483 sev->handle = start->handle;
6484 sev->fd = argp->sev_fd;
6485
6486 e_free_session:
6487 kfree(session_blob);
6488 e_free_dh:
6489 kfree(dh_blob);
6490 e_free:
6491 kfree(start);
6492 return ret;
6493 }
6494
6495 static unsigned long get_num_contig_pages(unsigned long idx,
6496 struct page **inpages, unsigned long npages)
6497 {
6498 unsigned long paddr, next_paddr;
6499 unsigned long i = idx + 1, pages = 1;
6500
6501
6502 paddr = __sme_page_pa(inpages[idx]);
6503 while (i < npages) {
6504 next_paddr = __sme_page_pa(inpages[i++]);
6505 if ((paddr + PAGE_SIZE) == next_paddr) {
6506 pages++;
6507 paddr = next_paddr;
6508 continue;
6509 }
6510 break;
6511 }
6512
6513 return pages;
6514 }
6515
6516 static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
6517 {
6518 unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
6519 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6520 struct kvm_sev_launch_update_data params;
6521 struct sev_data_launch_update_data *data;
6522 struct page **inpages;
6523 int ret;
6524
6525 if (!sev_guest(kvm))
6526 return -ENOTTY;
6527
6528 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6529 return -EFAULT;
6530
6531 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6532 if (!data)
6533 return -ENOMEM;
6534
6535 vaddr = params.uaddr;
6536 size = params.len;
6537 vaddr_end = vaddr + size;
6538
6539
6540 inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
6541 if (!inpages) {
6542 ret = -ENOMEM;
6543 goto e_free;
6544 }
6545
6546
6547
6548
6549
6550
6551
6552 sev_clflush_pages(inpages, npages);
6553
6554 for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
6555 int offset, len;
6556
6557
6558
6559
6560
6561 offset = vaddr & (PAGE_SIZE - 1);
6562
6563
6564 pages = get_num_contig_pages(i, inpages, npages);
6565
6566 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
6567
6568 data->handle = sev->handle;
6569 data->len = len;
6570 data->address = __sme_page_pa(inpages[i]) + offset;
6571 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
6572 if (ret)
6573 goto e_unpin;
6574
6575 size -= len;
6576 next_vaddr = vaddr + len;
6577 }
6578
6579 e_unpin:
6580
6581 for (i = 0; i < npages; i++) {
6582 set_page_dirty_lock(inpages[i]);
6583 mark_page_accessed(inpages[i]);
6584 }
6585
6586 sev_unpin_memory(kvm, inpages, npages);
6587 e_free:
6588 kfree(data);
6589 return ret;
6590 }
6591
6592 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
6593 {
6594 void __user *measure = (void __user *)(uintptr_t)argp->data;
6595 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6596 struct sev_data_launch_measure *data;
6597 struct kvm_sev_launch_measure params;
6598 void __user *p = NULL;
6599 void *blob = NULL;
6600 int ret;
6601
6602 if (!sev_guest(kvm))
6603 return -ENOTTY;
6604
6605 if (copy_from_user(¶ms, measure, sizeof(params)))
6606 return -EFAULT;
6607
6608 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6609 if (!data)
6610 return -ENOMEM;
6611
6612
6613 if (!params.len)
6614 goto cmd;
6615
6616 p = (void __user *)(uintptr_t)params.uaddr;
6617 if (p) {
6618 if (params.len > SEV_FW_BLOB_MAX_SIZE) {
6619 ret = -EINVAL;
6620 goto e_free;
6621 }
6622
6623 ret = -ENOMEM;
6624 blob = kmalloc(params.len, GFP_KERNEL);
6625 if (!blob)
6626 goto e_free;
6627
6628 data->address = __psp_pa(blob);
6629 data->len = params.len;
6630 }
6631
6632 cmd:
6633 data->handle = sev->handle;
6634 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
6635
6636
6637
6638
6639 if (!params.len)
6640 goto done;
6641
6642 if (ret)
6643 goto e_free_blob;
6644
6645 if (blob) {
6646 if (copy_to_user(p, blob, params.len))
6647 ret = -EFAULT;
6648 }
6649
6650 done:
6651 params.len = data->len;
6652 if (copy_to_user(measure, ¶ms, sizeof(params)))
6653 ret = -EFAULT;
6654 e_free_blob:
6655 kfree(blob);
6656 e_free:
6657 kfree(data);
6658 return ret;
6659 }
6660
6661 static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
6662 {
6663 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6664 struct sev_data_launch_finish *data;
6665 int ret;
6666
6667 if (!sev_guest(kvm))
6668 return -ENOTTY;
6669
6670 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6671 if (!data)
6672 return -ENOMEM;
6673
6674 data->handle = sev->handle;
6675 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
6676
6677 kfree(data);
6678 return ret;
6679 }
6680
6681 static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
6682 {
6683 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6684 struct kvm_sev_guest_status params;
6685 struct sev_data_guest_status *data;
6686 int ret;
6687
6688 if (!sev_guest(kvm))
6689 return -ENOTTY;
6690
6691 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6692 if (!data)
6693 return -ENOMEM;
6694
6695 data->handle = sev->handle;
6696 ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
6697 if (ret)
6698 goto e_free;
6699
6700 params.policy = data->policy;
6701 params.state = data->state;
6702 params.handle = data->handle;
6703
6704 if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params)))
6705 ret = -EFAULT;
6706 e_free:
6707 kfree(data);
6708 return ret;
6709 }
6710
6711 static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
6712 unsigned long dst, int size,
6713 int *error, bool enc)
6714 {
6715 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6716 struct sev_data_dbg *data;
6717 int ret;
6718
6719 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6720 if (!data)
6721 return -ENOMEM;
6722
6723 data->handle = sev->handle;
6724 data->dst_addr = dst;
6725 data->src_addr = src;
6726 data->len = size;
6727
6728 ret = sev_issue_cmd(kvm,
6729 enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
6730 data, error);
6731 kfree(data);
6732 return ret;
6733 }
6734
6735 static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
6736 unsigned long dst_paddr, int sz, int *err)
6737 {
6738 int offset;
6739
6740
6741
6742
6743
6744 src_paddr = round_down(src_paddr, 16);
6745 offset = src_paddr & 15;
6746 sz = round_up(sz + offset, 16);
6747
6748 return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
6749 }
6750
6751 static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
6752 unsigned long __user dst_uaddr,
6753 unsigned long dst_paddr,
6754 int size, int *err)
6755 {
6756 struct page *tpage = NULL;
6757 int ret, offset;
6758
6759
6760 if (!IS_ALIGNED(dst_paddr, 16) ||
6761 !IS_ALIGNED(paddr, 16) ||
6762 !IS_ALIGNED(size, 16)) {
6763 tpage = (void *)alloc_page(GFP_KERNEL);
6764 if (!tpage)
6765 return -ENOMEM;
6766
6767 dst_paddr = __sme_page_pa(tpage);
6768 }
6769
6770 ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
6771 if (ret)
6772 goto e_free;
6773
6774 if (tpage) {
6775 offset = paddr & 15;
6776 if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
6777 page_address(tpage) + offset, size))
6778 ret = -EFAULT;
6779 }
6780
6781 e_free:
6782 if (tpage)
6783 __free_page(tpage);
6784
6785 return ret;
6786 }
6787
6788 static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
6789 unsigned long __user vaddr,
6790 unsigned long dst_paddr,
6791 unsigned long __user dst_vaddr,
6792 int size, int *error)
6793 {
6794 struct page *src_tpage = NULL;
6795 struct page *dst_tpage = NULL;
6796 int ret, len = size;
6797
6798
6799 if (!IS_ALIGNED(vaddr, 16)) {
6800 src_tpage = alloc_page(GFP_KERNEL);
6801 if (!src_tpage)
6802 return -ENOMEM;
6803
6804 if (copy_from_user(page_address(src_tpage),
6805 (void __user *)(uintptr_t)vaddr, size)) {
6806 __free_page(src_tpage);
6807 return -EFAULT;
6808 }
6809
6810 paddr = __sme_page_pa(src_tpage);
6811 }
6812
6813
6814
6815
6816
6817
6818
6819 if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
6820 int dst_offset;
6821
6822 dst_tpage = alloc_page(GFP_KERNEL);
6823 if (!dst_tpage) {
6824 ret = -ENOMEM;
6825 goto e_free;
6826 }
6827
6828 ret = __sev_dbg_decrypt(kvm, dst_paddr,
6829 __sme_page_pa(dst_tpage), size, error);
6830 if (ret)
6831 goto e_free;
6832
6833
6834
6835
6836
6837 dst_offset = dst_paddr & 15;
6838
6839 if (src_tpage)
6840 memcpy(page_address(dst_tpage) + dst_offset,
6841 page_address(src_tpage), size);
6842 else {
6843 if (copy_from_user(page_address(dst_tpage) + dst_offset,
6844 (void __user *)(uintptr_t)vaddr, size)) {
6845 ret = -EFAULT;
6846 goto e_free;
6847 }
6848 }
6849
6850 paddr = __sme_page_pa(dst_tpage);
6851 dst_paddr = round_down(dst_paddr, 16);
6852 len = round_up(size, 16);
6853 }
6854
6855 ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
6856
6857 e_free:
6858 if (src_tpage)
6859 __free_page(src_tpage);
6860 if (dst_tpage)
6861 __free_page(dst_tpage);
6862 return ret;
6863 }
6864
6865 static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
6866 {
6867 unsigned long vaddr, vaddr_end, next_vaddr;
6868 unsigned long dst_vaddr;
6869 struct page **src_p, **dst_p;
6870 struct kvm_sev_dbg debug;
6871 unsigned long n;
6872 unsigned int size;
6873 int ret;
6874
6875 if (!sev_guest(kvm))
6876 return -ENOTTY;
6877
6878 if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
6879 return -EFAULT;
6880
6881 if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
6882 return -EINVAL;
6883 if (!debug.dst_uaddr)
6884 return -EINVAL;
6885
6886 vaddr = debug.src_uaddr;
6887 size = debug.len;
6888 vaddr_end = vaddr + size;
6889 dst_vaddr = debug.dst_uaddr;
6890
6891 for (; vaddr < vaddr_end; vaddr = next_vaddr) {
6892 int len, s_off, d_off;
6893
6894
6895 src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
6896 if (!src_p)
6897 return -EFAULT;
6898
6899 dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
6900 if (!dst_p) {
6901 sev_unpin_memory(kvm, src_p, n);
6902 return -EFAULT;
6903 }
6904
6905
6906
6907
6908
6909
6910
6911 sev_clflush_pages(src_p, 1);
6912 sev_clflush_pages(dst_p, 1);
6913
6914
6915
6916
6917
6918 s_off = vaddr & ~PAGE_MASK;
6919 d_off = dst_vaddr & ~PAGE_MASK;
6920 len = min_t(size_t, (PAGE_SIZE - s_off), size);
6921
6922 if (dec)
6923 ret = __sev_dbg_decrypt_user(kvm,
6924 __sme_page_pa(src_p[0]) + s_off,
6925 dst_vaddr,
6926 __sme_page_pa(dst_p[0]) + d_off,
6927 len, &argp->error);
6928 else
6929 ret = __sev_dbg_encrypt_user(kvm,
6930 __sme_page_pa(src_p[0]) + s_off,
6931 vaddr,
6932 __sme_page_pa(dst_p[0]) + d_off,
6933 dst_vaddr,
6934 len, &argp->error);
6935
6936 sev_unpin_memory(kvm, src_p, n);
6937 sev_unpin_memory(kvm, dst_p, n);
6938
6939 if (ret)
6940 goto err;
6941
6942 next_vaddr = vaddr + len;
6943 dst_vaddr = dst_vaddr + len;
6944 size -= len;
6945 }
6946 err:
6947 return ret;
6948 }
6949
6950 static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
6951 {
6952 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
6953 struct sev_data_launch_secret *data;
6954 struct kvm_sev_launch_secret params;
6955 struct page **pages;
6956 void *blob, *hdr;
6957 unsigned long n;
6958 int ret, offset;
6959
6960 if (!sev_guest(kvm))
6961 return -ENOTTY;
6962
6963 if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params)))
6964 return -EFAULT;
6965
6966 pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
6967 if (!pages)
6968 return -ENOMEM;
6969
6970
6971
6972
6973
6974 if (get_num_contig_pages(0, pages, n) != n) {
6975 ret = -EINVAL;
6976 goto e_unpin_memory;
6977 }
6978
6979 ret = -ENOMEM;
6980 data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
6981 if (!data)
6982 goto e_unpin_memory;
6983
6984 offset = params.guest_uaddr & (PAGE_SIZE - 1);
6985 data->guest_address = __sme_page_pa(pages[0]) + offset;
6986 data->guest_len = params.guest_len;
6987
6988 blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
6989 if (IS_ERR(blob)) {
6990 ret = PTR_ERR(blob);
6991 goto e_free;
6992 }
6993
6994 data->trans_address = __psp_pa(blob);
6995 data->trans_len = params.trans_len;
6996
6997 hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
6998 if (IS_ERR(hdr)) {
6999 ret = PTR_ERR(hdr);
7000 goto e_free_blob;
7001 }
7002 data->hdr_address = __psp_pa(hdr);
7003 data->hdr_len = params.hdr_len;
7004
7005 data->handle = sev->handle;
7006 ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
7007
7008 kfree(hdr);
7009
7010 e_free_blob:
7011 kfree(blob);
7012 e_free:
7013 kfree(data);
7014 e_unpin_memory:
7015 sev_unpin_memory(kvm, pages, n);
7016 return ret;
7017 }
7018
7019 static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
7020 {
7021 struct kvm_sev_cmd sev_cmd;
7022 int r;
7023
7024 if (!svm_sev_enabled())
7025 return -ENOTTY;
7026
7027 if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
7028 return -EFAULT;
7029
7030 mutex_lock(&kvm->lock);
7031
7032 switch (sev_cmd.id) {
7033 case KVM_SEV_INIT:
7034 r = sev_guest_init(kvm, &sev_cmd);
7035 break;
7036 case KVM_SEV_LAUNCH_START:
7037 r = sev_launch_start(kvm, &sev_cmd);
7038 break;
7039 case KVM_SEV_LAUNCH_UPDATE_DATA:
7040 r = sev_launch_update_data(kvm, &sev_cmd);
7041 break;
7042 case KVM_SEV_LAUNCH_MEASURE:
7043 r = sev_launch_measure(kvm, &sev_cmd);
7044 break;
7045 case KVM_SEV_LAUNCH_FINISH:
7046 r = sev_launch_finish(kvm, &sev_cmd);
7047 break;
7048 case KVM_SEV_GUEST_STATUS:
7049 r = sev_guest_status(kvm, &sev_cmd);
7050 break;
7051 case KVM_SEV_DBG_DECRYPT:
7052 r = sev_dbg_crypt(kvm, &sev_cmd, true);
7053 break;
7054 case KVM_SEV_DBG_ENCRYPT:
7055 r = sev_dbg_crypt(kvm, &sev_cmd, false);
7056 break;
7057 case KVM_SEV_LAUNCH_SECRET:
7058 r = sev_launch_secret(kvm, &sev_cmd);
7059 break;
7060 default:
7061 r = -EINVAL;
7062 goto out;
7063 }
7064
7065 if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
7066 r = -EFAULT;
7067
7068 out:
7069 mutex_unlock(&kvm->lock);
7070 return r;
7071 }
7072
7073 static int svm_register_enc_region(struct kvm *kvm,
7074 struct kvm_enc_region *range)
7075 {
7076 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
7077 struct enc_region *region;
7078 int ret = 0;
7079
7080 if (!sev_guest(kvm))
7081 return -ENOTTY;
7082
7083 if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
7084 return -EINVAL;
7085
7086 region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
7087 if (!region)
7088 return -ENOMEM;
7089
7090 region->pages = sev_pin_memory(kvm, range->addr, range->size, ®ion->npages, 1);
7091 if (!region->pages) {
7092 ret = -ENOMEM;
7093 goto e_free;
7094 }
7095
7096
7097
7098
7099
7100
7101
7102 sev_clflush_pages(region->pages, region->npages);
7103
7104 region->uaddr = range->addr;
7105 region->size = range->size;
7106
7107 mutex_lock(&kvm->lock);
7108 list_add_tail(®ion->list, &sev->regions_list);
7109 mutex_unlock(&kvm->lock);
7110
7111 return ret;
7112
7113 e_free:
7114 kfree(region);
7115 return ret;
7116 }
7117
7118 static struct enc_region *
7119 find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
7120 {
7121 struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
7122 struct list_head *head = &sev->regions_list;
7123 struct enc_region *i;
7124
7125 list_for_each_entry(i, head, list) {
7126 if (i->uaddr == range->addr &&
7127 i->size == range->size)
7128 return i;
7129 }
7130
7131 return NULL;
7132 }
7133
7134
7135 static int svm_unregister_enc_region(struct kvm *kvm,
7136 struct kvm_enc_region *range)
7137 {
7138 struct enc_region *region;
7139 int ret;
7140
7141 mutex_lock(&kvm->lock);
7142
7143 if (!sev_guest(kvm)) {
7144 ret = -ENOTTY;
7145 goto failed;
7146 }
7147
7148 region = find_enc_region(kvm, range);
7149 if (!region) {
7150 ret = -EINVAL;
7151 goto failed;
7152 }
7153
7154 __unregister_enc_region_locked(kvm, region);
7155
7156 mutex_unlock(&kvm->lock);
7157 return 0;
7158
7159 failed:
7160 mutex_unlock(&kvm->lock);
7161 return ret;
7162 }
7163
7164 static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
7165 {
7166 unsigned long cr4 = kvm_read_cr4(vcpu);
7167 bool smep = cr4 & X86_CR4_SMEP;
7168 bool smap = cr4 & X86_CR4_SMAP;
7169 bool is_user = svm_get_cpl(vcpu) == 3;
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183
7184
7185
7186
7187
7188
7189
7190
7191
7192
7193
7194
7195
7196
7197
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211 if (smap && (!smep || is_user)) {
7212 if (!sev_guest(vcpu->kvm))
7213 return true;
7214
7215 pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
7216 kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
7217 }
7218
7219 return false;
7220 }
7221
7222 static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
7223 {
7224 struct vcpu_svm *svm = to_svm(vcpu);
7225
7226
7227
7228
7229
7230
7231
7232
7233 return !gif_set(svm) ||
7234 (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
7235 }
7236
7237 static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
7238 .cpu_has_kvm_support = has_svm,
7239 .disabled_by_bios = is_disabled,
7240 .hardware_setup = svm_hardware_setup,
7241 .hardware_unsetup = svm_hardware_unsetup,
7242 .check_processor_compatibility = svm_check_processor_compat,
7243 .hardware_enable = svm_hardware_enable,
7244 .hardware_disable = svm_hardware_disable,
7245 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
7246 .has_emulated_msr = svm_has_emulated_msr,
7247
7248 .vcpu_create = svm_create_vcpu,
7249 .vcpu_free = svm_free_vcpu,
7250 .vcpu_reset = svm_vcpu_reset,
7251
7252 .vm_alloc = svm_vm_alloc,
7253 .vm_free = svm_vm_free,
7254 .vm_init = avic_vm_init,
7255 .vm_destroy = svm_vm_destroy,
7256
7257 .prepare_guest_switch = svm_prepare_guest_switch,
7258 .vcpu_load = svm_vcpu_load,
7259 .vcpu_put = svm_vcpu_put,
7260 .vcpu_blocking = svm_vcpu_blocking,
7261 .vcpu_unblocking = svm_vcpu_unblocking,
7262
7263 .update_bp_intercept = update_bp_intercept,
7264 .get_msr_feature = svm_get_msr_feature,
7265 .get_msr = svm_get_msr,
7266 .set_msr = svm_set_msr,
7267 .get_segment_base = svm_get_segment_base,
7268 .get_segment = svm_get_segment,
7269 .set_segment = svm_set_segment,
7270 .get_cpl = svm_get_cpl,
7271 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
7272 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
7273 .decache_cr3 = svm_decache_cr3,
7274 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
7275 .set_cr0 = svm_set_cr0,
7276 .set_cr3 = svm_set_cr3,
7277 .set_cr4 = svm_set_cr4,
7278 .set_efer = svm_set_efer,
7279 .get_idt = svm_get_idt,
7280 .set_idt = svm_set_idt,
7281 .get_gdt = svm_get_gdt,
7282 .set_gdt = svm_set_gdt,
7283 .get_dr6 = svm_get_dr6,
7284 .set_dr6 = svm_set_dr6,
7285 .set_dr7 = svm_set_dr7,
7286 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
7287 .cache_reg = svm_cache_reg,
7288 .get_rflags = svm_get_rflags,
7289 .set_rflags = svm_set_rflags,
7290
7291 .tlb_flush = svm_flush_tlb,
7292 .tlb_flush_gva = svm_flush_tlb_gva,
7293
7294 .run = svm_vcpu_run,
7295 .handle_exit = handle_exit,
7296 .skip_emulated_instruction = skip_emulated_instruction,
7297 .set_interrupt_shadow = svm_set_interrupt_shadow,
7298 .get_interrupt_shadow = svm_get_interrupt_shadow,
7299 .patch_hypercall = svm_patch_hypercall,
7300 .set_irq = svm_set_irq,
7301 .set_nmi = svm_inject_nmi,
7302 .queue_exception = svm_queue_exception,
7303 .cancel_injection = svm_cancel_injection,
7304 .interrupt_allowed = svm_interrupt_allowed,
7305 .nmi_allowed = svm_nmi_allowed,
7306 .get_nmi_mask = svm_get_nmi_mask,
7307 .set_nmi_mask = svm_set_nmi_mask,
7308 .enable_nmi_window = enable_nmi_window,
7309 .enable_irq_window = enable_irq_window,
7310 .update_cr8_intercept = update_cr8_intercept,
7311 .set_virtual_apic_mode = svm_set_virtual_apic_mode,
7312 .get_enable_apicv = svm_get_enable_apicv,
7313 .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
7314 .load_eoi_exitmap = svm_load_eoi_exitmap,
7315 .hwapic_irr_update = svm_hwapic_irr_update,
7316 .hwapic_isr_update = svm_hwapic_isr_update,
7317 .sync_pir_to_irr = kvm_lapic_find_highest_irr,
7318 .apicv_post_state_restore = avic_post_state_restore,
7319
7320 .set_tss_addr = svm_set_tss_addr,
7321 .set_identity_map_addr = svm_set_identity_map_addr,
7322 .get_tdp_level = get_npt_level,
7323 .get_mt_mask = svm_get_mt_mask,
7324
7325 .get_exit_info = svm_get_exit_info,
7326
7327 .get_lpage_level = svm_get_lpage_level,
7328
7329 .cpuid_update = svm_cpuid_update,
7330
7331 .rdtscp_supported = svm_rdtscp_supported,
7332 .invpcid_supported = svm_invpcid_supported,
7333 .mpx_supported = svm_mpx_supported,
7334 .xsaves_supported = svm_xsaves_supported,
7335 .umip_emulated = svm_umip_emulated,
7336 .pt_supported = svm_pt_supported,
7337 .pku_supported = svm_pku_supported,
7338
7339 .set_supported_cpuid = svm_set_supported_cpuid,
7340
7341 .has_wbinvd_exit = svm_has_wbinvd_exit,
7342
7343 .read_l1_tsc_offset = svm_read_l1_tsc_offset,
7344 .write_l1_tsc_offset = svm_write_l1_tsc_offset,
7345
7346 .set_tdp_cr3 = set_tdp_cr3,
7347
7348 .check_intercept = svm_check_intercept,
7349 .handle_exit_irqoff = svm_handle_exit_irqoff,
7350
7351 .request_immediate_exit = __kvm_request_immediate_exit,
7352
7353 .sched_in = svm_sched_in,
7354
7355 .pmu_ops = &amd_pmu_ops,
7356 .deliver_posted_interrupt = svm_deliver_avic_intr,
7357 .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
7358 .update_pi_irte = svm_update_pi_irte,
7359 .setup_mce = svm_setup_mce,
7360
7361 .smi_allowed = svm_smi_allowed,
7362 .pre_enter_smm = svm_pre_enter_smm,
7363 .pre_leave_smm = svm_pre_leave_smm,
7364 .enable_smi_window = enable_smi_window,
7365
7366 .mem_enc_op = svm_mem_enc_op,
7367 .mem_enc_reg_region = svm_register_enc_region,
7368 .mem_enc_unreg_region = svm_unregister_enc_region,
7369
7370 .nested_enable_evmcs = NULL,
7371 .nested_get_evmcs_version = NULL,
7372
7373 .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
7374
7375 .apic_init_signal_blocked = svm_apic_init_signal_blocked,
7376 };
7377
7378 static int __init svm_init(void)
7379 {
7380 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
7381 __alignof__(struct vcpu_svm), THIS_MODULE);
7382 }
7383
7384 static void __exit svm_exit(void)
7385 {
7386 kvm_exit();
7387 }
7388
7389 module_init(svm_init)
7390 module_exit(svm_exit)