1 /*
2  *  Copyright (C) 1995  Linus Torvalds
3  *
4  *  Pentium III FXSR, SSE support
5  *	Gareth Hughes <gareth@valinux.com>, May 2000
6  *
7  *  X86-64 port
8  *	Andi Kleen.
9  *
10  *	CPU hotplug support - ashok.raj@intel.com
11  */
12 
13 /*
14  * This file handles the architecture-dependent parts of process handling..
15  */
16 
17 #include <linux/cpu.h>
18 #include <linux/errno.h>
19 #include <linux/sched.h>
20 #include <linux/fs.h>
21 #include <linux/kernel.h>
22 #include <linux/mm.h>
23 #include <linux/elfcore.h>
24 #include <linux/smp.h>
25 #include <linux/slab.h>
26 #include <linux/user.h>
27 #include <linux/interrupt.h>
28 #include <linux/delay.h>
29 #include <linux/module.h>
30 #include <linux/ptrace.h>
31 #include <linux/notifier.h>
32 #include <linux/kprobes.h>
33 #include <linux/kdebug.h>
34 #include <linux/prctl.h>
35 #include <linux/uaccess.h>
36 #include <linux/io.h>
37 #include <linux/ftrace.h>
38 
39 #include <asm/pgtable.h>
40 #include <asm/processor.h>
41 #include <asm/i387.h>
42 #include <asm/fpu-internal.h>
43 #include <asm/mmu_context.h>
44 #include <asm/prctl.h>
45 #include <asm/desc.h>
46 #include <asm/proto.h>
47 #include <asm/ia32.h>
48 #include <asm/idle.h>
49 #include <asm/syscalls.h>
50 #include <asm/debugreg.h>
51 #include <asm/switch_to.h>
52 #include <asm/xen/hypervisor.h>
53 
54 asmlinkage extern void ret_from_fork(void);
55 
56 __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
57 
58 /* Prints also some state that isn't saved in the pt_regs */
__show_regs(struct pt_regs * regs,int all)59 void __show_regs(struct pt_regs *regs, int all)
60 {
61 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
62 	unsigned long d0, d1, d2, d3, d6, d7;
63 	unsigned int fsindex, gsindex;
64 	unsigned int ds, cs, es;
65 
66 	printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
67 	printk_address(regs->ip);
68 	printk(KERN_DEFAULT "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
69 			regs->sp, regs->flags);
70 	printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
71 	       regs->ax, regs->bx, regs->cx);
72 	printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
73 	       regs->dx, regs->si, regs->di);
74 	printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
75 	       regs->bp, regs->r8, regs->r9);
76 	printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
77 	       regs->r10, regs->r11, regs->r12);
78 	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
79 	       regs->r13, regs->r14, regs->r15);
80 
81 	asm("movl %%ds,%0" : "=r" (ds));
82 	asm("movl %%cs,%0" : "=r" (cs));
83 	asm("movl %%es,%0" : "=r" (es));
84 	asm("movl %%fs,%0" : "=r" (fsindex));
85 	asm("movl %%gs,%0" : "=r" (gsindex));
86 
87 	rdmsrl(MSR_FS_BASE, fs);
88 	rdmsrl(MSR_GS_BASE, gs);
89 	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
90 
91 	if (!all)
92 		return;
93 
94 	cr0 = read_cr0();
95 	cr2 = read_cr2();
96 	cr3 = read_cr3();
97 	cr4 = __read_cr4();
98 
99 	printk(KERN_DEFAULT "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
100 	       fs, fsindex, gs, gsindex, shadowgs);
101 	printk(KERN_DEFAULT "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
102 			es, cr0);
103 	printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
104 			cr4);
105 
106 	get_debugreg(d0, 0);
107 	get_debugreg(d1, 1);
108 	get_debugreg(d2, 2);
109 	get_debugreg(d3, 3);
110 	get_debugreg(d6, 6);
111 	get_debugreg(d7, 7);
112 
113 	/* Only print out debug registers if they are in their non-default state. */
114 	if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
115 	    (d6 == DR6_RESERVED) && (d7 == 0x400))
116 		return;
117 
118 	printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
119 	printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
120 
121 }
122 
release_thread(struct task_struct * dead_task)123 void release_thread(struct task_struct *dead_task)
124 {
125 	if (dead_task->mm) {
126 		if (dead_task->mm->context.ldt) {
127 			pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
128 				dead_task->comm,
129 				dead_task->mm->context.ldt,
130 				dead_task->mm->context.ldt->size);
131 			BUG();
132 		}
133 	}
134 }
135 
set_32bit_tls(struct task_struct * t,int tls,u32 addr)136 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
137 {
138 	struct user_desc ud = {
139 		.base_addr = addr,
140 		.limit = 0xfffff,
141 		.seg_32bit = 1,
142 		.limit_in_pages = 1,
143 		.useable = 1,
144 	};
145 	struct desc_struct *desc = t->thread.tls_array;
146 	desc += tls;
147 	fill_ldt(desc, &ud);
148 }
149 
read_32bit_tls(struct task_struct * t,int tls)150 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
151 {
152 	return get_desc_base(&t->thread.tls_array[tls]);
153 }
154 
copy_thread(unsigned long clone_flags,unsigned long sp,unsigned long arg,struct task_struct * p)155 int copy_thread(unsigned long clone_flags, unsigned long sp,
156 		unsigned long arg, struct task_struct *p)
157 {
158 	int err;
159 	struct pt_regs *childregs;
160 	struct task_struct *me = current;
161 
162 	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
163 	childregs = task_pt_regs(p);
164 	p->thread.sp = (unsigned long) childregs;
165 	set_tsk_thread_flag(p, TIF_FORK);
166 	p->thread.io_bitmap_ptr = NULL;
167 
168 	savesegment(gs, p->thread.gsindex);
169 	p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
170 	savesegment(fs, p->thread.fsindex);
171 	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
172 	savesegment(es, p->thread.es);
173 	savesegment(ds, p->thread.ds);
174 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
175 
176 	if (unlikely(p->flags & PF_KTHREAD)) {
177 		/* kernel thread */
178 		memset(childregs, 0, sizeof(struct pt_regs));
179 		childregs->sp = (unsigned long)childregs;
180 		childregs->ss = __KERNEL_DS;
181 		childregs->bx = sp; /* function */
182 		childregs->bp = arg;
183 		childregs->orig_ax = -1;
184 		childregs->cs = __KERNEL_CS | get_kernel_rpl();
185 		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
186 		return 0;
187 	}
188 	*childregs = *current_pt_regs();
189 
190 	childregs->ax = 0;
191 	if (sp)
192 		childregs->sp = sp;
193 
194 	err = -ENOMEM;
195 	if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
196 		p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
197 						  IO_BITMAP_BYTES, GFP_KERNEL);
198 		if (!p->thread.io_bitmap_ptr) {
199 			p->thread.io_bitmap_max = 0;
200 			return -ENOMEM;
201 		}
202 		set_tsk_thread_flag(p, TIF_IO_BITMAP);
203 	}
204 
205 	/*
206 	 * Set a new TLS for the child thread?
207 	 */
208 	if (clone_flags & CLONE_SETTLS) {
209 #ifdef CONFIG_IA32_EMULATION
210 		if (is_ia32_task())
211 			err = do_set_thread_area(p, -1,
212 				(struct user_desc __user *)childregs->si, 0);
213 		else
214 #endif
215 			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
216 		if (err)
217 			goto out;
218 	}
219 	err = 0;
220 out:
221 	if (err && p->thread.io_bitmap_ptr) {
222 		kfree(p->thread.io_bitmap_ptr);
223 		p->thread.io_bitmap_max = 0;
224 	}
225 
226 	return err;
227 }
228 
229 static void
start_thread_common(struct pt_regs * regs,unsigned long new_ip,unsigned long new_sp,unsigned int _cs,unsigned int _ss,unsigned int _ds)230 start_thread_common(struct pt_regs *regs, unsigned long new_ip,
231 		    unsigned long new_sp,
232 		    unsigned int _cs, unsigned int _ss, unsigned int _ds)
233 {
234 	loadsegment(fs, 0);
235 	loadsegment(es, _ds);
236 	loadsegment(ds, _ds);
237 	load_gs_index(0);
238 	regs->ip		= new_ip;
239 	regs->sp		= new_sp;
240 	regs->cs		= _cs;
241 	regs->ss		= _ss;
242 	regs->flags		= X86_EFLAGS_IF;
243 	force_iret();
244 }
245 
246 void
start_thread(struct pt_regs * regs,unsigned long new_ip,unsigned long new_sp)247 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
248 {
249 	start_thread_common(regs, new_ip, new_sp,
250 			    __USER_CS, __USER_DS, 0);
251 }
252 
253 #ifdef CONFIG_IA32_EMULATION
start_thread_ia32(struct pt_regs * regs,u32 new_ip,u32 new_sp)254 void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
255 {
256 	start_thread_common(regs, new_ip, new_sp,
257 			    test_thread_flag(TIF_X32)
258 			    ? __USER_CS : __USER32_CS,
259 			    __USER_DS, __USER_DS);
260 }
261 #endif
262 
263 /*
264  *	switch_to(x,y) should switch tasks from x to y.
265  *
266  * This could still be optimized:
267  * - fold all the options into a flag word and test it with a single test.
268  * - could test fs/gs bitsliced
269  *
270  * Kprobes not supported here. Set the probe on schedule instead.
271  * Function graph tracer not supported too.
272  */
273 __visible __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct * prev_p,struct task_struct * next_p)274 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
275 {
276 	struct thread_struct *prev = &prev_p->thread;
277 	struct thread_struct *next = &next_p->thread;
278 	int cpu = smp_processor_id();
279 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
280 	unsigned fsindex, gsindex;
281 	fpu_switch_t fpu;
282 
283 	fpu = switch_fpu_prepare(prev_p, next_p, cpu);
284 
285 	/* We must save %fs and %gs before load_TLS() because
286 	 * %fs and %gs may be cleared by load_TLS().
287 	 *
288 	 * (e.g. xen_load_tls())
289 	 */
290 	savesegment(fs, fsindex);
291 	savesegment(gs, gsindex);
292 
293 	/*
294 	 * Load TLS before restoring any segments so that segment loads
295 	 * reference the correct GDT entries.
296 	 */
297 	load_TLS(next, cpu);
298 
299 	/*
300 	 * Leave lazy mode, flushing any hypercalls made here.  This
301 	 * must be done after loading TLS entries in the GDT but before
302 	 * loading segments that might reference them, and and it must
303 	 * be done before math_state_restore, so the TS bit is up to
304 	 * date.
305 	 */
306 	arch_end_context_switch(next_p);
307 
308 	/* Switch DS and ES.
309 	 *
310 	 * Reading them only returns the selectors, but writing them (if
311 	 * nonzero) loads the full descriptor from the GDT or LDT.  The
312 	 * LDT for next is loaded in switch_mm, and the GDT is loaded
313 	 * above.
314 	 *
315 	 * We therefore need to write new values to the segment
316 	 * registers on every context switch unless both the new and old
317 	 * values are zero.
318 	 *
319 	 * Note that we don't need to do anything for CS and SS, as
320 	 * those are saved and restored as part of pt_regs.
321 	 */
322 	savesegment(es, prev->es);
323 	if (unlikely(next->es | prev->es))
324 		loadsegment(es, next->es);
325 
326 	savesegment(ds, prev->ds);
327 	if (unlikely(next->ds | prev->ds))
328 		loadsegment(ds, next->ds);
329 
330 	/*
331 	 * Switch FS and GS.
332 	 *
333 	 * These are even more complicated than FS and GS: they have
334 	 * 64-bit bases are that controlled by arch_prctl.  Those bases
335 	 * only differ from the values in the GDT or LDT if the selector
336 	 * is 0.
337 	 *
338 	 * Loading the segment register resets the hidden base part of
339 	 * the register to 0 or the value from the GDT / LDT.  If the
340 	 * next base address zero, writing 0 to the segment register is
341 	 * much faster than using wrmsr to explicitly zero the base.
342 	 *
343 	 * The thread_struct.fs and thread_struct.gs values are 0
344 	 * if the fs and gs bases respectively are not overridden
345 	 * from the values implied by fsindex and gsindex.  They
346 	 * are nonzero, and store the nonzero base addresses, if
347 	 * the bases are overridden.
348 	 *
349 	 * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
350 	 * be impossible.
351 	 *
352 	 * Therefore we need to reload the segment registers if either
353 	 * the old or new selector is nonzero, and we need to override
354 	 * the base address if next thread expects it to be overridden.
355 	 *
356 	 * This code is unnecessarily slow in the case where the old and
357 	 * new indexes are zero and the new base is nonzero -- it will
358 	 * unnecessarily write 0 to the selector before writing the new
359 	 * base address.
360 	 *
361 	 * Note: This all depends on arch_prctl being the only way that
362 	 * user code can override the segment base.  Once wrfsbase and
363 	 * wrgsbase are enabled, most of this code will need to change.
364 	 */
365 	if (unlikely(fsindex | next->fsindex | prev->fs)) {
366 		loadsegment(fs, next->fsindex);
367 
368 		/*
369 		 * If user code wrote a nonzero value to FS, then it also
370 		 * cleared the overridden base address.
371 		 *
372 		 * XXX: if user code wrote 0 to FS and cleared the base
373 		 * address itself, we won't notice and we'll incorrectly
374 		 * restore the prior base address next time we reschdule
375 		 * the process.
376 		 */
377 		if (fsindex)
378 			prev->fs = 0;
379 	}
380 	if (next->fs)
381 		wrmsrl(MSR_FS_BASE, next->fs);
382 	prev->fsindex = fsindex;
383 
384 	if (unlikely(gsindex | next->gsindex | prev->gs)) {
385 		load_gs_index(next->gsindex);
386 
387 		/* This works (and fails) the same way as fsindex above. */
388 		if (gsindex)
389 			prev->gs = 0;
390 	}
391 	if (next->gs)
392 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
393 	prev->gsindex = gsindex;
394 
395 	switch_fpu_finish(next_p, fpu);
396 
397 	/*
398 	 * Switch the PDA and FPU contexts.
399 	 */
400 	this_cpu_write(current_task, next_p);
401 
402 	/*
403 	 * If it were not for PREEMPT_ACTIVE we could guarantee that the
404 	 * preempt_count of all tasks was equal here and this would not be
405 	 * needed.
406 	 */
407 	task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
408 	this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
409 
410 	/* Reload esp0 and ss1.  This changes current_thread_info(). */
411 	load_sp0(tss, next);
412 
413 	this_cpu_write(kernel_stack,
414 		(unsigned long)task_stack_page(next_p) + THREAD_SIZE);
415 
416 	/*
417 	 * Now maybe reload the debug registers and handle I/O bitmaps
418 	 */
419 	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
420 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
421 		__switch_to_xtra(prev_p, next_p, tss);
422 
423 #ifdef CONFIG_XEN
424 	/*
425 	 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
426 	 * current_pt_regs()->flags may not match the current task's
427 	 * intended IOPL.  We need to switch it manually.
428 	 */
429 	if (unlikely(xen_pv_domain() &&
430 		     prev->iopl != next->iopl))
431 		xen_set_iopl_mask(next->iopl);
432 #endif
433 
434 	if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
435 		/*
436 		 * AMD CPUs have a misfeature: SYSRET sets the SS selector but
437 		 * does not update the cached descriptor.  As a result, if we
438 		 * do SYSRET while SS is NULL, we'll end up in user mode with
439 		 * SS apparently equal to __USER_DS but actually unusable.
440 		 *
441 		 * The straightforward workaround would be to fix it up just
442 		 * before SYSRET, but that would slow down the system call
443 		 * fast paths.  Instead, we ensure that SS is never NULL in
444 		 * system call context.  We do this by replacing NULL SS
445 		 * selectors at every context switch.  SYSCALL sets up a valid
446 		 * SS, so the only way to get NULL is to re-enter the kernel
447 		 * from CPL 3 through an interrupt.  Since that can't happen
448 		 * in the same task as a running syscall, we are guaranteed to
449 		 * context switch between every interrupt vector entry and a
450 		 * subsequent SYSRET.
451 		 *
452 		 * We read SS first because SS reads are much faster than
453 		 * writes.  Out of caution, we force SS to __KERNEL_DS even if
454 		 * it previously had a different non-NULL value.
455 		 */
456 		unsigned short ss_sel;
457 		savesegment(ss, ss_sel);
458 		if (ss_sel != __KERNEL_DS)
459 			loadsegment(ss, __KERNEL_DS);
460 	}
461 
462 	return prev_p;
463 }
464 
set_personality_64bit(void)465 void set_personality_64bit(void)
466 {
467 	/* inherit personality from parent */
468 
469 	/* Make sure to be in 64bit mode */
470 	clear_thread_flag(TIF_IA32);
471 	clear_thread_flag(TIF_ADDR32);
472 	clear_thread_flag(TIF_X32);
473 
474 	/* Ensure the corresponding mm is not marked. */
475 	if (current->mm)
476 		current->mm->context.ia32_compat = 0;
477 
478 	/* TBD: overwrites user setup. Should have two bits.
479 	   But 64bit processes have always behaved this way,
480 	   so it's not too bad. The main problem is just that
481 	   32bit childs are affected again. */
482 	current->personality &= ~READ_IMPLIES_EXEC;
483 }
484 
set_personality_ia32(bool x32)485 void set_personality_ia32(bool x32)
486 {
487 	/* inherit personality from parent */
488 
489 	/* Make sure to be in 32bit mode */
490 	set_thread_flag(TIF_ADDR32);
491 
492 	/* Mark the associated mm as containing 32-bit tasks. */
493 	if (x32) {
494 		clear_thread_flag(TIF_IA32);
495 		set_thread_flag(TIF_X32);
496 		if (current->mm)
497 			current->mm->context.ia32_compat = TIF_X32;
498 		current->personality &= ~READ_IMPLIES_EXEC;
499 		/* is_compat_task() uses the presence of the x32
500 		   syscall bit flag to determine compat status */
501 		current_thread_info()->status &= ~TS_COMPAT;
502 	} else {
503 		set_thread_flag(TIF_IA32);
504 		clear_thread_flag(TIF_X32);
505 		if (current->mm)
506 			current->mm->context.ia32_compat = TIF_IA32;
507 		current->personality |= force_personality32;
508 		/* Prepare the first "return" to user space */
509 		current_thread_info()->status |= TS_COMPAT;
510 	}
511 }
512 EXPORT_SYMBOL_GPL(set_personality_ia32);
513 
514 /*
515  * Called from fs/proc with a reference on @p to find the function
516  * which called into schedule(). This needs to be done carefully
517  * because the task might wake up and we might look at a stack
518  * changing under us.
519  */
get_wchan(struct task_struct * p)520 unsigned long get_wchan(struct task_struct *p)
521 {
522 	unsigned long start, bottom, top, sp, fp, ip;
523 	int count = 0;
524 
525 	if (!p || p == current || p->state == TASK_RUNNING)
526 		return 0;
527 
528 	start = (unsigned long)task_stack_page(p);
529 	if (!start)
530 		return 0;
531 
532 	/*
533 	 * Layout of the stack page:
534 	 *
535 	 * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long)
536 	 * PADDING
537 	 * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
538 	 * stack
539 	 * ----------- bottom = start + sizeof(thread_info)
540 	 * thread_info
541 	 * ----------- start
542 	 *
543 	 * The tasks stack pointer points at the location where the
544 	 * framepointer is stored. The data on the stack is:
545 	 * ... IP FP ... IP FP
546 	 *
547 	 * We need to read FP and IP, so we need to adjust the upper
548 	 * bound by another unsigned long.
549 	 */
550 	top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
551 	top -= 2 * sizeof(unsigned long);
552 	bottom = start + sizeof(struct thread_info);
553 
554 	sp = READ_ONCE(p->thread.sp);
555 	if (sp < bottom || sp > top)
556 		return 0;
557 
558 	fp = READ_ONCE(*(unsigned long *)sp);
559 	do {
560 		if (fp < bottom || fp > top)
561 			return 0;
562 		ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long)));
563 		if (!in_sched_functions(ip))
564 			return ip;
565 		fp = READ_ONCE(*(unsigned long *)fp);
566 	} while (count++ < 16 && p->state != TASK_RUNNING);
567 	return 0;
568 }
569 
do_arch_prctl(struct task_struct * task,int code,unsigned long addr)570 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
571 {
572 	int ret = 0;
573 	int doit = task == current;
574 	int cpu;
575 
576 	switch (code) {
577 	case ARCH_SET_GS:
578 		if (addr >= TASK_SIZE_OF(task))
579 			return -EPERM;
580 		cpu = get_cpu();
581 		/* handle small bases via the GDT because that's faster to
582 		   switch. */
583 		if (addr <= 0xffffffff) {
584 			set_32bit_tls(task, GS_TLS, addr);
585 			if (doit) {
586 				load_TLS(&task->thread, cpu);
587 				load_gs_index(GS_TLS_SEL);
588 			}
589 			task->thread.gsindex = GS_TLS_SEL;
590 			task->thread.gs = 0;
591 		} else {
592 			task->thread.gsindex = 0;
593 			task->thread.gs = addr;
594 			if (doit) {
595 				load_gs_index(0);
596 				ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
597 			}
598 		}
599 		put_cpu();
600 		break;
601 	case ARCH_SET_FS:
602 		/* Not strictly needed for fs, but do it for symmetry
603 		   with gs */
604 		if (addr >= TASK_SIZE_OF(task))
605 			return -EPERM;
606 		cpu = get_cpu();
607 		/* handle small bases via the GDT because that's faster to
608 		   switch. */
609 		if (addr <= 0xffffffff) {
610 			set_32bit_tls(task, FS_TLS, addr);
611 			if (doit) {
612 				load_TLS(&task->thread, cpu);
613 				loadsegment(fs, FS_TLS_SEL);
614 			}
615 			task->thread.fsindex = FS_TLS_SEL;
616 			task->thread.fs = 0;
617 		} else {
618 			task->thread.fsindex = 0;
619 			task->thread.fs = addr;
620 			if (doit) {
621 				/* set the selector to 0 to not confuse
622 				   __switch_to */
623 				loadsegment(fs, 0);
624 				ret = wrmsrl_safe(MSR_FS_BASE, addr);
625 			}
626 		}
627 		put_cpu();
628 		break;
629 	case ARCH_GET_FS: {
630 		unsigned long base;
631 		if (task->thread.fsindex == FS_TLS_SEL)
632 			base = read_32bit_tls(task, FS_TLS);
633 		else if (doit)
634 			rdmsrl(MSR_FS_BASE, base);
635 		else
636 			base = task->thread.fs;
637 		ret = put_user(base, (unsigned long __user *)addr);
638 		break;
639 	}
640 	case ARCH_GET_GS: {
641 		unsigned long base;
642 		unsigned gsindex;
643 		if (task->thread.gsindex == GS_TLS_SEL)
644 			base = read_32bit_tls(task, GS_TLS);
645 		else if (doit) {
646 			savesegment(gs, gsindex);
647 			if (gsindex)
648 				rdmsrl(MSR_KERNEL_GS_BASE, base);
649 			else
650 				base = task->thread.gs;
651 		} else
652 			base = task->thread.gs;
653 		ret = put_user(base, (unsigned long __user *)addr);
654 		break;
655 	}
656 
657 	default:
658 		ret = -EINVAL;
659 		break;
660 	}
661 
662 	return ret;
663 }
664 
sys_arch_prctl(int code,unsigned long addr)665 long sys_arch_prctl(int code, unsigned long addr)
666 {
667 	return do_arch_prctl(current, code, addr);
668 }
669 
KSTK_ESP(struct task_struct * task)670 unsigned long KSTK_ESP(struct task_struct *task)
671 {
672 	return task_pt_regs(task)->sp;
673 }
674