1 /*
2  * Copyright (C) 1994 Linus Torvalds
3  *
4  * Pentium III FXSR, SSE support
5  * General FPU state handling cleanups
6  *	Gareth Hughes <gareth@valinux.com>, May 2000
7  * x86-64 work by Andi Kleen 2002
8  */
9 
10 #ifndef _ASM_X86_FPU_INTERNAL_H
11 #define _ASM_X86_FPU_INTERNAL_H
12 
13 #include <linux/compat.h>
14 #include <linux/sched.h>
15 #include <linux/slab.h>
16 
17 #include <asm/user.h>
18 #include <asm/fpu/api.h>
19 #include <asm/fpu/xstate.h>
20 
21 /*
22  * High level FPU state handling functions:
23  */
24 extern void fpu__activate_curr(struct fpu *fpu);
25 extern void fpu__activate_fpstate_read(struct fpu *fpu);
26 extern void fpu__activate_fpstate_write(struct fpu *fpu);
27 extern void fpu__save(struct fpu *fpu);
28 extern void fpu__restore(struct fpu *fpu);
29 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
30 extern void fpu__drop(struct fpu *fpu);
31 extern int  fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
32 extern void fpu__clear(struct fpu *fpu);
33 extern int  fpu__exception_code(struct fpu *fpu, int trap_nr);
34 extern int  dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
35 
36 /*
37  * Boot time FPU initialization functions:
38  */
39 extern void fpu__init_cpu(void);
40 extern void fpu__init_system_xstate(void);
41 extern void fpu__init_cpu_xstate(void);
42 extern void fpu__init_system(struct cpuinfo_x86 *c);
43 extern void fpu__init_check_bugs(void);
44 extern void fpu__resume_cpu(void);
45 
46 /*
47  * Debugging facility:
48  */
49 #ifdef CONFIG_X86_DEBUG_FPU
50 # define WARN_ON_FPU(x) WARN_ON_ONCE(x)
51 #else
52 # define WARN_ON_FPU(x) ({ (void)(x); 0; })
53 #endif
54 
55 /*
56  * FPU related CPU feature flag helper routines:
57  */
use_eager_fpu(void)58 static __always_inline __pure bool use_eager_fpu(void)
59 {
60 	return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
61 }
62 
use_xsaveopt(void)63 static __always_inline __pure bool use_xsaveopt(void)
64 {
65 	return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
66 }
67 
use_xsave(void)68 static __always_inline __pure bool use_xsave(void)
69 {
70 	return static_cpu_has_safe(X86_FEATURE_XSAVE);
71 }
72 
use_fxsr(void)73 static __always_inline __pure bool use_fxsr(void)
74 {
75 	return static_cpu_has_safe(X86_FEATURE_FXSR);
76 }
77 
78 /*
79  * fpstate handling functions:
80  */
81 
82 extern union fpregs_state init_fpstate;
83 
84 extern void fpstate_init(union fpregs_state *state);
85 #ifdef CONFIG_MATH_EMULATION
86 extern void fpstate_init_soft(struct swregs_state *soft);
87 #else
fpstate_init_soft(struct swregs_state * soft)88 static inline void fpstate_init_soft(struct swregs_state *soft) {}
89 #endif
fpstate_init_fxstate(struct fxregs_state * fx)90 static inline void fpstate_init_fxstate(struct fxregs_state *fx)
91 {
92 	fx->cwd = 0x37f;
93 	fx->mxcsr = MXCSR_DEFAULT;
94 }
95 extern void fpstate_sanitize_xstate(struct fpu *fpu);
96 
97 #define user_insn(insn, output, input...)				\
98 ({									\
99 	int err;							\
100 	asm volatile(ASM_STAC "\n"					\
101 		     "1:" #insn "\n\t"					\
102 		     "2: " ASM_CLAC "\n"				\
103 		     ".section .fixup,\"ax\"\n"				\
104 		     "3:  movl $-1,%[err]\n"				\
105 		     "    jmp  2b\n"					\
106 		     ".previous\n"					\
107 		     _ASM_EXTABLE(1b, 3b)				\
108 		     : [err] "=r" (err), output				\
109 		     : "0"(0), input);					\
110 	err;								\
111 })
112 
113 #define check_insn(insn, output, input...)				\
114 ({									\
115 	int err;							\
116 	asm volatile("1:" #insn "\n\t"					\
117 		     "2:\n"						\
118 		     ".section .fixup,\"ax\"\n"				\
119 		     "3:  movl $-1,%[err]\n"				\
120 		     "    jmp  2b\n"					\
121 		     ".previous\n"					\
122 		     _ASM_EXTABLE(1b, 3b)				\
123 		     : [err] "=r" (err), output				\
124 		     : "0"(0), input);					\
125 	err;								\
126 })
127 
copy_fregs_to_user(struct fregs_state __user * fx)128 static inline int copy_fregs_to_user(struct fregs_state __user *fx)
129 {
130 	return user_insn(fnsave %[fx]; fwait,  [fx] "=m" (*fx), "m" (*fx));
131 }
132 
copy_fxregs_to_user(struct fxregs_state __user * fx)133 static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
134 {
135 	if (config_enabled(CONFIG_X86_32))
136 		return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
137 	else if (config_enabled(CONFIG_AS_FXSAVEQ))
138 		return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
139 
140 	/* See comment in copy_fxregs_to_kernel() below. */
141 	return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
142 }
143 
copy_kernel_to_fxregs(struct fxregs_state * fx)144 static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
145 {
146 	int err;
147 
148 	if (config_enabled(CONFIG_X86_32)) {
149 		err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
150 	} else {
151 		if (config_enabled(CONFIG_AS_FXSAVEQ)) {
152 			err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
153 		} else {
154 			/* See comment in copy_fxregs_to_kernel() below. */
155 			err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
156 		}
157 	}
158 	/* Copying from a kernel buffer to FPU registers should never fail: */
159 	WARN_ON_FPU(err);
160 }
161 
copy_user_to_fxregs(struct fxregs_state __user * fx)162 static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
163 {
164 	if (config_enabled(CONFIG_X86_32))
165 		return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
166 	else if (config_enabled(CONFIG_AS_FXSAVEQ))
167 		return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
168 
169 	/* See comment in copy_fxregs_to_kernel() below. */
170 	return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
171 			  "m" (*fx));
172 }
173 
copy_kernel_to_fregs(struct fregs_state * fx)174 static inline void copy_kernel_to_fregs(struct fregs_state *fx)
175 {
176 	int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
177 
178 	WARN_ON_FPU(err);
179 }
180 
copy_user_to_fregs(struct fregs_state __user * fx)181 static inline int copy_user_to_fregs(struct fregs_state __user *fx)
182 {
183 	return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
184 }
185 
copy_fxregs_to_kernel(struct fpu * fpu)186 static inline void copy_fxregs_to_kernel(struct fpu *fpu)
187 {
188 	if (config_enabled(CONFIG_X86_32))
189 		asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
190 	else if (config_enabled(CONFIG_AS_FXSAVEQ))
191 		asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
192 	else {
193 		/* Using "rex64; fxsave %0" is broken because, if the memory
194 		 * operand uses any extended registers for addressing, a second
195 		 * REX prefix will be generated (to the assembler, rex64
196 		 * followed by semicolon is a separate instruction), and hence
197 		 * the 64-bitness is lost.
198 		 *
199 		 * Using "fxsaveq %0" would be the ideal choice, but is only
200 		 * supported starting with gas 2.16.
201 		 *
202 		 * Using, as a workaround, the properly prefixed form below
203 		 * isn't accepted by any binutils version so far released,
204 		 * complaining that the same type of prefix is used twice if
205 		 * an extended register is needed for addressing (fix submitted
206 		 * to mainline 2005-11-21).
207 		 *
208 		 *  asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
209 		 *
210 		 * This, however, we can work around by forcing the compiler to
211 		 * select an addressing mode that doesn't require extended
212 		 * registers.
213 		 */
214 		asm volatile( "rex64/fxsave (%[fx])"
215 			     : "=m" (fpu->state.fxsave)
216 			     : [fx] "R" (&fpu->state.fxsave));
217 	}
218 }
219 
220 /* These macros all use (%edi)/(%rdi) as the single memory argument. */
221 #define XSAVE		".byte " REX_PREFIX "0x0f,0xae,0x27"
222 #define XSAVEOPT	".byte " REX_PREFIX "0x0f,0xae,0x37"
223 #define XSAVES		".byte " REX_PREFIX "0x0f,0xc7,0x2f"
224 #define XRSTOR		".byte " REX_PREFIX "0x0f,0xae,0x2f"
225 #define XRSTORS		".byte " REX_PREFIX "0x0f,0xc7,0x1f"
226 
227 /* xstate instruction fault handler: */
228 #define xstate_fault(__err)		\
229 					\
230 	".section .fixup,\"ax\"\n"	\
231 					\
232 	"3:  movl $-2,%[_err]\n"	\
233 	"    jmp  2b\n"			\
234 					\
235 	".previous\n"			\
236 					\
237 	_ASM_EXTABLE(1b, 3b)		\
238 	: [_err] "=r" (__err)
239 
240 /*
241  * This function is called only during boot time when x86 caps are not set
242  * up and alternative can not be used yet.
243  */
copy_xregs_to_kernel_booting(struct xregs_state * xstate)244 static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
245 {
246 	u64 mask = -1;
247 	u32 lmask = mask;
248 	u32 hmask = mask >> 32;
249 	int err = 0;
250 
251 	WARN_ON(system_state != SYSTEM_BOOTING);
252 
253 	if (boot_cpu_has(X86_FEATURE_XSAVES))
254 		asm volatile("1:"XSAVES"\n\t"
255 			"2:\n\t"
256 			     xstate_fault(err)
257 			: "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
258 			: "memory");
259 	else
260 		asm volatile("1:"XSAVE"\n\t"
261 			"2:\n\t"
262 			     xstate_fault(err)
263 			: "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
264 			: "memory");
265 
266 	/* We should never fault when copying to a kernel buffer: */
267 	WARN_ON_FPU(err);
268 }
269 
270 /*
271  * This function is called only during boot time when x86 caps are not set
272  * up and alternative can not be used yet.
273  */
copy_kernel_to_xregs_booting(struct xregs_state * xstate)274 static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
275 {
276 	u64 mask = -1;
277 	u32 lmask = mask;
278 	u32 hmask = mask >> 32;
279 	int err = 0;
280 
281 	WARN_ON(system_state != SYSTEM_BOOTING);
282 
283 	if (boot_cpu_has(X86_FEATURE_XSAVES))
284 		asm volatile("1:"XRSTORS"\n\t"
285 			"2:\n\t"
286 			     xstate_fault(err)
287 			: "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
288 			: "memory");
289 	else
290 		asm volatile("1:"XRSTOR"\n\t"
291 			"2:\n\t"
292 			     xstate_fault(err)
293 			: "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
294 			: "memory");
295 
296 	/* We should never fault when copying from a kernel buffer: */
297 	WARN_ON_FPU(err);
298 }
299 
300 /*
301  * Save processor xstate to xsave area.
302  */
copy_xregs_to_kernel(struct xregs_state * xstate)303 static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
304 {
305 	u64 mask = -1;
306 	u32 lmask = mask;
307 	u32 hmask = mask >> 32;
308 	int err = 0;
309 
310 	WARN_ON(!alternatives_patched);
311 
312 	/*
313 	 * If xsaves is enabled, xsaves replaces xsaveopt because
314 	 * it supports compact format and supervisor states in addition to
315 	 * modified optimization in xsaveopt.
316 	 *
317 	 * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
318 	 * because xsaveopt supports modified optimization which is not
319 	 * supported by xsave.
320 	 *
321 	 * If none of xsaves and xsaveopt is enabled, use xsave.
322 	 */
323 	alternative_input_2(
324 		"1:"XSAVE,
325 		XSAVEOPT,
326 		X86_FEATURE_XSAVEOPT,
327 		XSAVES,
328 		X86_FEATURE_XSAVES,
329 		[xstate] "D" (xstate), "a" (lmask), "d" (hmask) :
330 		"memory");
331 	asm volatile("2:\n\t"
332 		     xstate_fault(err)
333 		     : "0" (err)
334 		     : "memory");
335 
336 	/* We should never fault when copying to a kernel buffer: */
337 	WARN_ON_FPU(err);
338 }
339 
340 /*
341  * Restore processor xstate from xsave area.
342  */
copy_kernel_to_xregs(struct xregs_state * xstate,u64 mask)343 static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
344 {
345 	u32 lmask = mask;
346 	u32 hmask = mask >> 32;
347 	int err = 0;
348 
349 	/*
350 	 * Use xrstors to restore context if it is enabled. xrstors supports
351 	 * compacted format of xsave area which is not supported by xrstor.
352 	 */
353 	alternative_input(
354 		"1: " XRSTOR,
355 		XRSTORS,
356 		X86_FEATURE_XSAVES,
357 		"D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask)
358 		: "memory");
359 
360 	asm volatile("2:\n"
361 		     xstate_fault(err)
362 		     : "0" (err)
363 		     : "memory");
364 
365 	/* We should never fault when copying from a kernel buffer: */
366 	WARN_ON_FPU(err);
367 }
368 
369 /*
370  * Save xstate to user space xsave area.
371  *
372  * We don't use modified optimization because xrstor/xrstors might track
373  * a different application.
374  *
375  * We don't use compacted format xsave area for
376  * backward compatibility for old applications which don't understand
377  * compacted format of xsave area.
378  */
copy_xregs_to_user(struct xregs_state __user * buf)379 static inline int copy_xregs_to_user(struct xregs_state __user *buf)
380 {
381 	int err;
382 
383 	/*
384 	 * Clear the xsave header first, so that reserved fields are
385 	 * initialized to zero.
386 	 */
387 	err = __clear_user(&buf->header, sizeof(buf->header));
388 	if (unlikely(err))
389 		return -EFAULT;
390 
391 	__asm__ __volatile__(ASM_STAC "\n"
392 			     "1:"XSAVE"\n"
393 			     "2: " ASM_CLAC "\n"
394 			     xstate_fault(err)
395 			     : "D" (buf), "a" (-1), "d" (-1), "0" (err)
396 			     : "memory");
397 	return err;
398 }
399 
400 /*
401  * Restore xstate from user space xsave area.
402  */
copy_user_to_xregs(struct xregs_state __user * buf,u64 mask)403 static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
404 {
405 	struct xregs_state *xstate = ((__force struct xregs_state *)buf);
406 	u32 lmask = mask;
407 	u32 hmask = mask >> 32;
408 	int err = 0;
409 
410 	__asm__ __volatile__(ASM_STAC "\n"
411 			     "1:"XRSTOR"\n"
412 			     "2: " ASM_CLAC "\n"
413 			     xstate_fault(err)
414 			     : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err)
415 			     : "memory");	/* memory required? */
416 	return err;
417 }
418 
419 /*
420  * These must be called with preempt disabled. Returns
421  * 'true' if the FPU state is still intact and we can
422  * keep registers active.
423  *
424  * The legacy FNSAVE instruction cleared all FPU state
425  * unconditionally, so registers are essentially destroyed.
426  * Modern FPU state can be kept in registers, if there are
427  * no pending FP exceptions.
428  */
copy_fpregs_to_fpstate(struct fpu * fpu)429 static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
430 {
431 	if (likely(use_xsave())) {
432 		copy_xregs_to_kernel(&fpu->state.xsave);
433 		return 1;
434 	}
435 
436 	if (likely(use_fxsr())) {
437 		copy_fxregs_to_kernel(fpu);
438 		return 1;
439 	}
440 
441 	/*
442 	 * Legacy FPU register saving, FNSAVE always clears FPU registers,
443 	 * so we have to mark them inactive:
444 	 */
445 	asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
446 
447 	return 0;
448 }
449 
__copy_kernel_to_fpregs(union fpregs_state * fpstate)450 static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate)
451 {
452 	if (use_xsave()) {
453 		copy_kernel_to_xregs(&fpstate->xsave, -1);
454 	} else {
455 		if (use_fxsr())
456 			copy_kernel_to_fxregs(&fpstate->fxsave);
457 		else
458 			copy_kernel_to_fregs(&fpstate->fsave);
459 	}
460 }
461 
copy_kernel_to_fpregs(union fpregs_state * fpstate)462 static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
463 {
464 	/*
465 	 * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
466 	 * pending. Clear the x87 state here by setting it to fixed values.
467 	 * "m" is a random variable that should be in L1.
468 	 */
469 	if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
470 		asm volatile(
471 			"fnclex\n\t"
472 			"emms\n\t"
473 			"fildl %P[addr]"	/* set F?P to defined value */
474 			: : [addr] "m" (fpstate));
475 	}
476 
477 	__copy_kernel_to_fpregs(fpstate);
478 }
479 
480 extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
481 
482 /*
483  * FPU context switch related helper methods:
484  */
485 
486 DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
487 
488 /*
489  * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
490  * on this CPU.
491  *
492  * This will disable any lazy FPU state restore of the current FPU state,
493  * but if the current thread owns the FPU, it will still be saved by.
494  */
__cpu_disable_lazy_restore(unsigned int cpu)495 static inline void __cpu_disable_lazy_restore(unsigned int cpu)
496 {
497 	per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
498 }
499 
fpu_want_lazy_restore(struct fpu * fpu,unsigned int cpu)500 static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
501 {
502 	return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
503 }
504 
505 
506 /*
507  * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
508  * idiom, which is then paired with the sw-flag (fpregs_active) later on:
509  */
510 
__fpregs_activate_hw(void)511 static inline void __fpregs_activate_hw(void)
512 {
513 	if (!use_eager_fpu())
514 		clts();
515 }
516 
__fpregs_deactivate_hw(void)517 static inline void __fpregs_deactivate_hw(void)
518 {
519 	if (!use_eager_fpu())
520 		stts();
521 }
522 
523 /* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
__fpregs_deactivate(struct fpu * fpu)524 static inline void __fpregs_deactivate(struct fpu *fpu)
525 {
526 	WARN_ON_FPU(!fpu->fpregs_active);
527 
528 	fpu->fpregs_active = 0;
529 	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
530 }
531 
532 /* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
__fpregs_activate(struct fpu * fpu)533 static inline void __fpregs_activate(struct fpu *fpu)
534 {
535 	WARN_ON_FPU(fpu->fpregs_active);
536 
537 	fpu->fpregs_active = 1;
538 	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
539 }
540 
541 /*
542  * The question "does this thread have fpu access?"
543  * is slightly racy, since preemption could come in
544  * and revoke it immediately after the test.
545  *
546  * However, even in that very unlikely scenario,
547  * we can just assume we have FPU access - typically
548  * to save the FP state - we'll just take a #NM
549  * fault and get the FPU access back.
550  */
fpregs_active(void)551 static inline int fpregs_active(void)
552 {
553 	return current->thread.fpu.fpregs_active;
554 }
555 
556 /*
557  * Encapsulate the CR0.TS handling together with the
558  * software flag.
559  *
560  * These generally need preemption protection to work,
561  * do try to avoid using these on their own.
562  */
fpregs_activate(struct fpu * fpu)563 static inline void fpregs_activate(struct fpu *fpu)
564 {
565 	__fpregs_activate_hw();
566 	__fpregs_activate(fpu);
567 }
568 
fpregs_deactivate(struct fpu * fpu)569 static inline void fpregs_deactivate(struct fpu *fpu)
570 {
571 	__fpregs_deactivate(fpu);
572 	__fpregs_deactivate_hw();
573 }
574 
575 /*
576  * FPU state switching for scheduling.
577  *
578  * This is a two-stage process:
579  *
580  *  - switch_fpu_prepare() saves the old state and
581  *    sets the new state of the CR0.TS bit. This is
582  *    done within the context of the old process.
583  *
584  *  - switch_fpu_finish() restores the new state as
585  *    necessary.
586  */
587 typedef struct { int preload; } fpu_switch_t;
588 
589 static inline fpu_switch_t
switch_fpu_prepare(struct fpu * old_fpu,struct fpu * new_fpu,int cpu)590 switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
591 {
592 	fpu_switch_t fpu;
593 
594 	/*
595 	 * If the task has used the math, pre-load the FPU on xsave processors
596 	 * or if the past 5 consecutive context-switches used math.
597 	 */
598 	fpu.preload = new_fpu->fpstate_active &&
599 		      (use_eager_fpu() || new_fpu->counter > 5);
600 
601 	if (old_fpu->fpregs_active) {
602 		if (!copy_fpregs_to_fpstate(old_fpu))
603 			old_fpu->last_cpu = -1;
604 		else
605 			old_fpu->last_cpu = cpu;
606 
607 		/* But leave fpu_fpregs_owner_ctx! */
608 		old_fpu->fpregs_active = 0;
609 
610 		/* Don't change CR0.TS if we just switch! */
611 		if (fpu.preload) {
612 			new_fpu->counter++;
613 			__fpregs_activate(new_fpu);
614 			prefetch(&new_fpu->state);
615 		} else {
616 			__fpregs_deactivate_hw();
617 		}
618 	} else {
619 		old_fpu->counter = 0;
620 		old_fpu->last_cpu = -1;
621 		if (fpu.preload) {
622 			new_fpu->counter++;
623 			if (fpu_want_lazy_restore(new_fpu, cpu))
624 				fpu.preload = 0;
625 			else
626 				prefetch(&new_fpu->state);
627 			fpregs_activate(new_fpu);
628 		}
629 	}
630 	return fpu;
631 }
632 
633 /*
634  * Misc helper functions:
635  */
636 
637 /*
638  * By the time this gets called, we've already cleared CR0.TS and
639  * given the process the FPU if we are going to preload the FPU
640  * state - all we need to do is to conditionally restore the register
641  * state itself.
642  */
switch_fpu_finish(struct fpu * new_fpu,fpu_switch_t fpu_switch)643 static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
644 {
645 	if (fpu_switch.preload)
646 		copy_kernel_to_fpregs(&new_fpu->state);
647 }
648 
649 /*
650  * Needs to be preemption-safe.
651  *
652  * NOTE! user_fpu_begin() must be used only immediately before restoring
653  * the save state. It does not do any saving/restoring on its own. In
654  * lazy FPU mode, it is just an optimization to avoid a #NM exception,
655  * the task can lose the FPU right after preempt_enable().
656  */
user_fpu_begin(void)657 static inline void user_fpu_begin(void)
658 {
659 	struct fpu *fpu = &current->thread.fpu;
660 
661 	preempt_disable();
662 	if (!fpregs_active())
663 		fpregs_activate(fpu);
664 	preempt_enable();
665 }
666 
667 /*
668  * MXCSR and XCR definitions:
669  */
670 
671 extern unsigned int mxcsr_feature_mask;
672 
673 #define XCR_XFEATURE_ENABLED_MASK	0x00000000
674 
xgetbv(u32 index)675 static inline u64 xgetbv(u32 index)
676 {
677 	u32 eax, edx;
678 
679 	asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
680 		     : "=a" (eax), "=d" (edx)
681 		     : "c" (index));
682 	return eax + ((u64)edx << 32);
683 }
684 
xsetbv(u32 index,u64 value)685 static inline void xsetbv(u32 index, u64 value)
686 {
687 	u32 eax = value;
688 	u32 edx = value >> 32;
689 
690 	asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
691 		     : : "a" (eax), "d" (edx), "c" (index));
692 }
693 
694 #endif /* _ASM_X86_FPU_INTERNAL_H */
695