1/******************************************************************************
2 * arch-x86_32.h
3 *
4 * Guest OS interface to x86 Xen.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Copyright (c) 2004-2006, K A Fraser
25 */
26
27#ifndef _ASM_X86_XEN_INTERFACE_H
28#define _ASM_X86_XEN_INTERFACE_H
29
30/*
31 * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
32 * in a struct in memory.
33 * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an
34 * hypercall argument.
35 * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but
36 * they might not be on other architectures.
37 */
38#ifdef __XEN__
39#define __DEFINE_GUEST_HANDLE(name, type) \
40    typedef struct { type *p; } __guest_handle_ ## name
41#else
42#define __DEFINE_GUEST_HANDLE(name, type) \
43    typedef type * __guest_handle_ ## name
44#endif
45
46#define DEFINE_GUEST_HANDLE_STRUCT(name) \
47	__DEFINE_GUEST_HANDLE(name, struct name)
48#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
49#define GUEST_HANDLE(name)        __guest_handle_ ## name
50
51#ifdef __XEN__
52#if defined(__i386__)
53#define set_xen_guest_handle(hnd, val)			\
54	do {						\
55		if (sizeof(hnd) == 8)			\
56			*(uint64_t *)&(hnd) = 0;	\
57		(hnd).p = val;				\
58	} while (0)
59#elif defined(__x86_64__)
60#define set_xen_guest_handle(hnd, val)	do { (hnd).p = val; } while (0)
61#endif
62#else
63#if defined(__i386__)
64#define set_xen_guest_handle(hnd, val)			\
65	do {						\
66		if (sizeof(hnd) == 8)			\
67			*(uint64_t *)&(hnd) = 0;	\
68		(hnd) = val;				\
69	} while (0)
70#elif defined(__x86_64__)
71#define set_xen_guest_handle(hnd, val)	do { (hnd) = val; } while (0)
72#endif
73#endif
74
75#ifndef __ASSEMBLY__
76/* Explicitly size integers that represent pfns in the public interface
77 * with Xen so that on ARM we can have one ABI that works for 32 and 64
78 * bit guests. */
79typedef unsigned long xen_pfn_t;
80#define PRI_xen_pfn "lx"
81typedef unsigned long xen_ulong_t;
82#define PRI_xen_ulong "lx"
83typedef long xen_long_t;
84#define PRI_xen_long "lx"
85
86/* Guest handles for primitive C types. */
87__DEFINE_GUEST_HANDLE(uchar, unsigned char);
88__DEFINE_GUEST_HANDLE(uint,  unsigned int);
89DEFINE_GUEST_HANDLE(char);
90DEFINE_GUEST_HANDLE(int);
91DEFINE_GUEST_HANDLE(void);
92DEFINE_GUEST_HANDLE(uint64_t);
93DEFINE_GUEST_HANDLE(uint32_t);
94DEFINE_GUEST_HANDLE(xen_pfn_t);
95DEFINE_GUEST_HANDLE(xen_ulong_t);
96#endif
97
98#ifndef HYPERVISOR_VIRT_START
99#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
100#endif
101
102#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
103#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
104#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>__MACH2PHYS_SHIFT)
105
106/* Maximum number of virtual CPUs in multi-processor guests. */
107#define MAX_VIRT_CPUS 32
108
109/*
110 * SEGMENT DESCRIPTOR TABLES
111 */
112/*
113 * A number of GDT entries are reserved by Xen. These are not situated at the
114 * start of the GDT because some stupid OSes export hard-coded selector values
115 * in their ABI. These hard-coded values are always near the start of the GDT,
116 * so Xen places itself out of the way, at the far end of the GDT.
117 *
118 * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op
119 */
120#define FIRST_RESERVED_GDT_PAGE  14
121#define FIRST_RESERVED_GDT_BYTE  (FIRST_RESERVED_GDT_PAGE * 4096)
122#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
123
124/*
125 * Send an array of these to HYPERVISOR_set_trap_table().
126 * Terminate the array with a sentinel entry, with traps[].address==0.
127 * The privilege level specifies which modes may enter a trap via a software
128 * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
129 * privilege levels as follows:
130 *  Level == 0: No one may enter
131 *  Level == 1: Kernel may enter
132 *  Level == 2: Kernel may enter
133 *  Level == 3: Everyone may enter
134 */
135#define TI_GET_DPL(_ti)		((_ti)->flags & 3)
136#define TI_GET_IF(_ti)		((_ti)->flags & 4)
137#define TI_SET_DPL(_ti, _dpl)	((_ti)->flags |= (_dpl))
138#define TI_SET_IF(_ti, _if)	((_ti)->flags |= ((!!(_if))<<2))
139
140#ifndef __ASSEMBLY__
141struct trap_info {
142    uint8_t       vector;  /* exception vector                              */
143    uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
144    uint16_t      cs;      /* code selector                                 */
145    unsigned long address; /* code offset                                   */
146};
147DEFINE_GUEST_HANDLE_STRUCT(trap_info);
148
149struct arch_shared_info {
150	/*
151	 * Number of valid entries in the p2m table(s) anchored at
152	 * pfn_to_mfn_frame_list_list and/or p2m_vaddr.
153	 */
154	unsigned long max_pfn;
155	/*
156	 * Frame containing list of mfns containing list of mfns containing p2m.
157	 * A value of 0 indicates it has not yet been set up, ~0 indicates it
158	 * has been set to invalid e.g. due to the p2m being too large for the
159	 * 3-level p2m tree. In this case the linear mapper p2m list anchored
160	 * at p2m_vaddr is to be used.
161	 */
162	xen_pfn_t pfn_to_mfn_frame_list_list;
163	unsigned long nmi_reason;
164	/*
165	 * Following three fields are valid if p2m_cr3 contains a value
166	 * different from 0.
167	 * p2m_cr3 is the root of the address space where p2m_vaddr is valid.
168	 * p2m_cr3 is in the same format as a cr3 value in the vcpu register
169	 * state and holds the folded machine frame number (via xen_pfn_to_cr3)
170	 * of a L3 or L4 page table.
171	 * p2m_vaddr holds the virtual address of the linear p2m list. All
172	 * entries in the range [0...max_pfn[ are accessible via this pointer.
173	 * p2m_generation will be incremented by the guest before and after each
174	 * change of the mappings of the p2m list. p2m_generation starts at 0
175	 * and a value with the least significant bit set indicates that a
176	 * mapping update is in progress. This allows guest external software
177	 * (e.g. in Dom0) to verify that read mappings are consistent and
178	 * whether they have changed since the last check.
179	 * Modifying a p2m element in the linear p2m list is allowed via an
180	 * atomic write only.
181	 */
182	unsigned long p2m_cr3;		/* cr3 value of the p2m address space */
183	unsigned long p2m_vaddr;	/* virtual address of the p2m list */
184	unsigned long p2m_generation;	/* generation count of p2m mapping */
185};
186#endif	/* !__ASSEMBLY__ */
187
188#ifdef CONFIG_X86_32
189#include <asm/xen/interface_32.h>
190#else
191#include <asm/xen/interface_64.h>
192#endif
193
194#include <asm/pvclock-abi.h>
195
196#ifndef __ASSEMBLY__
197/*
198 * The following is all CPU context. Note that the fpu_ctxt block is filled
199 * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
200 *
201 * Also note that when calling DOMCTL_setvcpucontext and VCPU_initialise
202 * for HVM and PVH guests, not all information in this structure is updated:
203 *
204 * - For HVM guests, the structures read include: fpu_ctxt (if
205 * VGCT_I387_VALID is set), flags, user_regs, debugreg[*]
206 *
207 * - PVH guests are the same as HVM guests, but additionally use ctrlreg[3] to
208 * set cr3. All other fields not used should be set to 0.
209 */
210struct vcpu_guest_context {
211    /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
212    struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
213#define VGCF_I387_VALID                (1<<0)
214#define VGCF_IN_KERNEL                 (1<<2)
215#define _VGCF_i387_valid               0
216#define VGCF_i387_valid                (1<<_VGCF_i387_valid)
217#define _VGCF_in_kernel                2
218#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
219#define _VGCF_failsafe_disables_events 3
220#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
221#define _VGCF_syscall_disables_events  4
222#define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)
223#define _VGCF_online                   5
224#define VGCF_online                    (1<<_VGCF_online)
225    unsigned long flags;                    /* VGCF_* flags                 */
226    struct cpu_user_regs user_regs;         /* User-level CPU registers     */
227    struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
228    unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
229    unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
230    unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */
231    /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
232    unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */
233    unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
234#ifdef __i386__
235    unsigned long event_callback_cs;        /* CS:EIP of event callback     */
236    unsigned long event_callback_eip;
237    unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */
238    unsigned long failsafe_callback_eip;
239#else
240    unsigned long event_callback_eip;
241    unsigned long failsafe_callback_eip;
242    unsigned long syscall_callback_eip;
243#endif
244    unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
245#ifdef __x86_64__
246    /* Segment base addresses. */
247    uint64_t      fs_base;
248    uint64_t      gs_base_kernel;
249    uint64_t      gs_base_user;
250#endif
251};
252DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context);
253
254/* AMD PMU registers and structures */
255struct xen_pmu_amd_ctxt {
256	/*
257	 * Offsets to counter and control MSRs (relative to xen_pmu_arch.c.amd).
258	 * For PV(H) guests these fields are RO.
259	 */
260	uint32_t counters;
261	uint32_t ctrls;
262
263	/* Counter MSRs */
264#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
265	uint64_t regs[];
266#elif defined(__GNUC__)
267	uint64_t regs[0];
268#endif
269};
270
271/* Intel PMU registers and structures */
272struct xen_pmu_cntr_pair {
273	uint64_t counter;
274	uint64_t control;
275};
276
277struct xen_pmu_intel_ctxt {
278	/*
279	 * Offsets to fixed and architectural counter MSRs (relative to
280	 * xen_pmu_arch.c.intel).
281	 * For PV(H) guests these fields are RO.
282	 */
283	uint32_t fixed_counters;
284	uint32_t arch_counters;
285
286	/* PMU registers */
287	uint64_t global_ctrl;
288	uint64_t global_ovf_ctrl;
289	uint64_t global_status;
290	uint64_t fixed_ctrl;
291	uint64_t ds_area;
292	uint64_t pebs_enable;
293	uint64_t debugctl;
294
295	/* Fixed and architectural counter MSRs */
296#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
297	uint64_t regs[];
298#elif defined(__GNUC__)
299	uint64_t regs[0];
300#endif
301};
302
303/* Sampled domain's registers */
304struct xen_pmu_regs {
305	uint64_t ip;
306	uint64_t sp;
307	uint64_t flags;
308	uint16_t cs;
309	uint16_t ss;
310	uint8_t cpl;
311	uint8_t pad[3];
312};
313
314/* PMU flags */
315#define PMU_CACHED	   (1<<0) /* PMU MSRs are cached in the context */
316#define PMU_SAMPLE_USER	   (1<<1) /* Sample is from user or kernel mode */
317#define PMU_SAMPLE_REAL	   (1<<2) /* Sample is from realmode */
318#define PMU_SAMPLE_PV	   (1<<3) /* Sample from a PV guest */
319
320/*
321 * Architecture-specific information describing state of the processor at
322 * the time of PMU interrupt.
323 * Fields of this structure marked as RW for guest should only be written by
324 * the guest when PMU_CACHED bit in pmu_flags is set (which is done by the
325 * hypervisor during PMU interrupt). Hypervisor will read updated data in
326 * XENPMU_flush hypercall and clear PMU_CACHED bit.
327 */
328struct xen_pmu_arch {
329	union {
330		/*
331		 * Processor's registers at the time of interrupt.
332		 * WO for hypervisor, RO for guests.
333		 */
334		struct xen_pmu_regs regs;
335		/*
336		 * Padding for adding new registers to xen_pmu_regs in
337		 * the future
338		 */
339#define XENPMU_REGS_PAD_SZ  64
340		uint8_t pad[XENPMU_REGS_PAD_SZ];
341	} r;
342
343	/* WO for hypervisor, RO for guest */
344	uint64_t pmu_flags;
345
346	/*
347	 * APIC LVTPC register.
348	 * RW for both hypervisor and guest.
349	 * Only APIC_LVT_MASKED bit is loaded by the hypervisor into hardware
350	 * during XENPMU_flush or XENPMU_lvtpc_set.
351	 */
352	union {
353		uint32_t lapic_lvtpc;
354		uint64_t pad;
355	} l;
356
357	/*
358	 * Vendor-specific PMU registers.
359	 * RW for both hypervisor and guest (see exceptions above).
360	 * Guest's updates to this field are verified and then loaded by the
361	 * hypervisor into hardware during XENPMU_flush
362	 */
363	union {
364		struct xen_pmu_amd_ctxt amd;
365		struct xen_pmu_intel_ctxt intel;
366
367		/*
368		 * Padding for contexts (fixed parts only, does not include
369		 * MSR banks that are specified by offsets)
370		 */
371#define XENPMU_CTXT_PAD_SZ  128
372		uint8_t pad[XENPMU_CTXT_PAD_SZ];
373	} c;
374};
375
376#endif	/* !__ASSEMBLY__ */
377
378/*
379 * Prefix forces emulation of some non-trapping instructions.
380 * Currently only CPUID.
381 */
382#ifdef __ASSEMBLY__
383#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
384#define XEN_CPUID          XEN_EMULATE_PREFIX cpuid
385#else
386#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
387#define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
388#endif
389
390#endif /* _ASM_X86_XEN_INTERFACE_H */
391