1 /****************************************************************************** 2 * arch-x86_32.h 3 * 4 * Guest OS interface to x86 Xen. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Copyright (c) 2004-2006, K A Fraser 25 */ 26 27 #ifndef _ASM_X86_XEN_INTERFACE_H 28 #define _ASM_X86_XEN_INTERFACE_H 29 30 /* 31 * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field 32 * in a struct in memory. 33 * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an 34 * hypercall argument. 35 * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but 36 * they might not be on other architectures. 37 */ 38 #ifdef __XEN__ 39 #define __DEFINE_GUEST_HANDLE(name, type) \ 40 typedef struct { type *p; } __guest_handle_ ## name 41 #else 42 #define __DEFINE_GUEST_HANDLE(name, type) \ 43 typedef type * __guest_handle_ ## name 44 #endif 45 46 #define DEFINE_GUEST_HANDLE_STRUCT(name) \ 47 __DEFINE_GUEST_HANDLE(name, struct name) 48 #define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) 49 #define GUEST_HANDLE(name) __guest_handle_ ## name 50 51 #ifdef __XEN__ 52 #if defined(__i386__) 53 #define set_xen_guest_handle(hnd, val) \ 54 do { \ 55 if (sizeof(hnd) == 8) \ 56 *(uint64_t *)&(hnd) = 0; \ 57 (hnd).p = val; \ 58 } while (0) 59 #elif defined(__x86_64__) 60 #define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) 61 #endif 62 #else 63 #if defined(__i386__) 64 #define set_xen_guest_handle(hnd, val) \ 65 do { \ 66 if (sizeof(hnd) == 8) \ 67 *(uint64_t *)&(hnd) = 0; \ 68 (hnd) = val; \ 69 } while (0) 70 #elif defined(__x86_64__) 71 #define set_xen_guest_handle(hnd, val) do { (hnd) = val; } while (0) 72 #endif 73 #endif 74 75 #ifndef __ASSEMBLY__ 76 /* Explicitly size integers that represent pfns in the public interface 77 * with Xen so that on ARM we can have one ABI that works for 32 and 64 78 * bit guests. */ 79 typedef unsigned long xen_pfn_t; 80 #define PRI_xen_pfn "lx" 81 typedef unsigned long xen_ulong_t; 82 #define PRI_xen_ulong "lx" 83 typedef long xen_long_t; 84 #define PRI_xen_long "lx" 85 86 /* Guest handles for primitive C types. */ 87 __DEFINE_GUEST_HANDLE(uchar, unsigned char); 88 __DEFINE_GUEST_HANDLE(uint, unsigned int); 89 DEFINE_GUEST_HANDLE(char); 90 DEFINE_GUEST_HANDLE(int); 91 DEFINE_GUEST_HANDLE(void); 92 DEFINE_GUEST_HANDLE(uint64_t); 93 DEFINE_GUEST_HANDLE(uint32_t); 94 DEFINE_GUEST_HANDLE(xen_pfn_t); 95 DEFINE_GUEST_HANDLE(xen_ulong_t); 96 #endif 97 98 #ifndef HYPERVISOR_VIRT_START 99 #define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) 100 #endif 101 102 #define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) 103 #define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) 104 #define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT) 105 106 /* Maximum number of virtual CPUs in multi-processor guests. */ 107 #define MAX_VIRT_CPUS 32 108 109 /* 110 * SEGMENT DESCRIPTOR TABLES 111 */ 112 /* 113 * A number of GDT entries are reserved by Xen. These are not situated at the 114 * start of the GDT because some stupid OSes export hard-coded selector values 115 * in their ABI. These hard-coded values are always near the start of the GDT, 116 * so Xen places itself out of the way, at the far end of the GDT. 117 * 118 * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op 119 */ 120 #define FIRST_RESERVED_GDT_PAGE 14 121 #define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) 122 #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) 123 124 /* 125 * Send an array of these to HYPERVISOR_set_trap_table(). 126 * Terminate the array with a sentinel entry, with traps[].address==0. 127 * The privilege level specifies which modes may enter a trap via a software 128 * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate 129 * privilege levels as follows: 130 * Level == 0: No one may enter 131 * Level == 1: Kernel may enter 132 * Level == 2: Kernel may enter 133 * Level == 3: Everyone may enter 134 */ 135 #define TI_GET_DPL(_ti) ((_ti)->flags & 3) 136 #define TI_GET_IF(_ti) ((_ti)->flags & 4) 137 #define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl)) 138 #define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) 139 140 #ifndef __ASSEMBLY__ 141 struct trap_info { 142 uint8_t vector; /* exception vector */ 143 uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ 144 uint16_t cs; /* code selector */ 145 unsigned long address; /* code offset */ 146 }; 147 DEFINE_GUEST_HANDLE_STRUCT(trap_info); 148 149 struct arch_shared_info { 150 /* 151 * Number of valid entries in the p2m table(s) anchored at 152 * pfn_to_mfn_frame_list_list and/or p2m_vaddr. 153 */ 154 unsigned long max_pfn; 155 /* 156 * Frame containing list of mfns containing list of mfns containing p2m. 157 * A value of 0 indicates it has not yet been set up, ~0 indicates it 158 * has been set to invalid e.g. due to the p2m being too large for the 159 * 3-level p2m tree. In this case the linear mapper p2m list anchored 160 * at p2m_vaddr is to be used. 161 */ 162 xen_pfn_t pfn_to_mfn_frame_list_list; 163 unsigned long nmi_reason; 164 /* 165 * Following three fields are valid if p2m_cr3 contains a value 166 * different from 0. 167 * p2m_cr3 is the root of the address space where p2m_vaddr is valid. 168 * p2m_cr3 is in the same format as a cr3 value in the vcpu register 169 * state and holds the folded machine frame number (via xen_pfn_to_cr3) 170 * of a L3 or L4 page table. 171 * p2m_vaddr holds the virtual address of the linear p2m list. All 172 * entries in the range [0...max_pfn[ are accessible via this pointer. 173 * p2m_generation will be incremented by the guest before and after each 174 * change of the mappings of the p2m list. p2m_generation starts at 0 175 * and a value with the least significant bit set indicates that a 176 * mapping update is in progress. This allows guest external software 177 * (e.g. in Dom0) to verify that read mappings are consistent and 178 * whether they have changed since the last check. 179 * Modifying a p2m element in the linear p2m list is allowed via an 180 * atomic write only. 181 */ 182 unsigned long p2m_cr3; /* cr3 value of the p2m address space */ 183 unsigned long p2m_vaddr; /* virtual address of the p2m list */ 184 unsigned long p2m_generation; /* generation count of p2m mapping */ 185 }; 186 #endif /* !__ASSEMBLY__ */ 187 188 #ifdef CONFIG_X86_32 189 #include <asm/xen/interface_32.h> 190 #else 191 #include <asm/xen/interface_64.h> 192 #endif 193 194 #include <asm/pvclock-abi.h> 195 196 #ifndef __ASSEMBLY__ 197 /* 198 * The following is all CPU context. Note that the fpu_ctxt block is filled 199 * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. 200 * 201 * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise 202 * for HVM and PVH guests, not all information in this structure is updated: 203 * 204 * - For HVM guests, the structures read include: fpu_ctxt (if 205 * VGCT_I387_VALID is set), flags, user_regs, debugreg[*] 206 * 207 * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to 208 * set cr3. All other fields not used should be set to 0. 209 */ 210 struct vcpu_guest_context { 211 /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ 212 struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ 213 #define VGCF_I387_VALID (1<<0) 214 #define VGCF_IN_KERNEL (1<<2) 215 #define _VGCF_i387_valid 0 216 #define VGCF_i387_valid (1<<_VGCF_i387_valid) 217 #define _VGCF_in_kernel 2 218 #define VGCF_in_kernel (1<<_VGCF_in_kernel) 219 #define _VGCF_failsafe_disables_events 3 220 #define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) 221 #define _VGCF_syscall_disables_events 4 222 #define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) 223 #define _VGCF_online 5 224 #define VGCF_online (1<<_VGCF_online) 225 unsigned long flags; /* VGCF_* flags */ 226 struct cpu_user_regs user_regs; /* User-level CPU registers */ 227 struct trap_info trap_ctxt[256]; /* Virtual IDT */ 228 unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ 229 unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ 230 unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ 231 /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ 232 unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ 233 unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ 234 #ifdef __i386__ 235 unsigned long event_callback_cs; /* CS:EIP of event callback */ 236 unsigned long event_callback_eip; 237 unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ 238 unsigned long failsafe_callback_eip; 239 #else 240 unsigned long event_callback_eip; 241 unsigned long failsafe_callback_eip; 242 unsigned long syscall_callback_eip; 243 #endif 244 unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ 245 #ifdef __x86_64__ 246 /* Segment base addresses. */ 247 uint64_t fs_base; 248 uint64_t gs_base_kernel; 249 uint64_t gs_base_user; 250 #endif 251 }; 252 DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); 253 254 /* AMD PMU registers and structures */ 255 struct xen_pmu_amd_ctxt { 256 /* 257 * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd). 258 * For PV(H) guests these fields are RO. 259 */ 260 uint32_t counters; 261 uint32_t ctrls; 262 263 /* Counter MSRs */ 264 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L 265 uint64_t regs[]; 266 #elif defined(__GNUC__) 267 uint64_t regs[0]; 268 #endif 269 }; 270 271 /* Intel PMU registers and structures */ 272 struct xen_pmu_cntr_pair { 273 uint64_t counter; 274 uint64_t control; 275 }; 276 277 struct xen_pmu_intel_ctxt { 278 /* 279 * Offsets to fixed and architectural counter MSRs (relative to 280 * xen_pmu_arch.c.intel). 281 * For PV(H) guests these fields are RO. 282 */ 283 uint32_t fixed_counters; 284 uint32_t arch_counters; 285 286 /* PMU registers */ 287 uint64_t global_ctrl; 288 uint64_t global_ovf_ctrl; 289 uint64_t global_status; 290 uint64_t fixed_ctrl; 291 uint64_t ds_area; 292 uint64_t pebs_enable; 293 uint64_t debugctl; 294 295 /* Fixed and architectural counter MSRs */ 296 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L 297 uint64_t regs[]; 298 #elif defined(__GNUC__) 299 uint64_t regs[0]; 300 #endif 301 }; 302 303 /* Sampled domain's registers */ 304 struct xen_pmu_regs { 305 uint64_t ip; 306 uint64_t sp; 307 uint64_t flags; 308 uint16_t cs; 309 uint16_t ss; 310 uint8_t cpl; 311 uint8_t pad[3]; 312 }; 313 314 /* PMU flags */ 315 #define PMU_CACHED (1<<0) /* PMU MSRs are cached in the context */ 316 #define PMU_SAMPLE_USER (1<<1) /* Sample is from user or kernel mode */ 317 #define PMU_SAMPLE_REAL (1<<2) /* Sample is from realmode */ 318 #define PMU_SAMPLE_PV (1<<3) /* Sample from a PV guest */ 319 320 /* 321 * Architecture-specific information describing state of the processor at 322 * the time of PMU interrupt. 323 * Fields of this structure marked as RW for guest should only be written by 324 * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the 325 * hypervisor during PMU interrupt). Hypervisor will read updated data in 326 * XENPMU_flush hypercall and clear PMU_CACHED bit. 327 */ 328 struct xen_pmu_arch { 329 union { 330 /* 331 * Processor's registers at the time of interrupt. 332 * WO for hypervisor, RO for guests. 333 */ 334 struct xen_pmu_regs regs; 335 /* 336 * Padding for adding new registers to xen_pmu_regs in 337 * the future 338 */ 339 #define XENPMU_REGS_PAD_SZ 64 340 uint8_t pad[XENPMU_REGS_PAD_SZ]; 341 } r; 342 343 /* WO for hypervisor, RO for guest */ 344 uint64_t pmu_flags; 345 346 /* 347 * APIC LVTPC register. 348 * RW for both hypervisor and guest. 349 * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware 350 * during XENPMU_flush or XENPMU_lvtpc_set. 351 */ 352 union { 353 uint32_t lapic_lvtpc; 354 uint64_t pad; 355 } l; 356 357 /* 358 * Vendor-specific PMU registers. 359 * RW for both hypervisor and guest (see exceptions above). 360 * Guest's updates to this field are verified and then loaded by the 361 * hypervisor into hardware during XENPMU_flush 362 */ 363 union { 364 struct xen_pmu_amd_ctxt amd; 365 struct xen_pmu_intel_ctxt intel; 366 367 /* 368 * Padding for contexts (fixed parts only, does not include 369 * MSR banks that are specified by offsets) 370 */ 371 #define XENPMU_CTXT_PAD_SZ 128 372 uint8_t pad[XENPMU_CTXT_PAD_SZ]; 373 } c; 374 }; 375 376 #endif /* !__ASSEMBLY__ */ 377 378 /* 379 * Prefix forces emulation of some non-trapping instructions. 380 * Currently only CPUID. 381 */ 382 #ifdef __ASSEMBLY__ 383 #define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; 384 #define XEN_CPUID XEN_EMULATE_PREFIX cpuid 385 #else 386 #define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " 387 #define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" 388 #endif 389 390 #endif /* _ASM_X86_XEN_INTERFACE_H */