This source file includes following definitions.
- __show_regs
- release_thread
- save_base_legacy
- save_fsgs
- save_fsgs_for_kvm
- loadseg
- load_seg_legacy
- x86_fsgsbase_load
- x86_fsgsbase_read_task
- x86_fsbase_read_task
- x86_gsbase_read_task
- x86_fsbase_write_task
- x86_gsbase_write_task
- copy_thread_tls
- start_thread_common
- start_thread
- compat_start_thread
- __switch_to
- set_personality_64bit
- __set_personality_x32
- __set_personality_ia32
- set_personality_ia32
- prctl_map_vdso
- do_arch_prctl_64
- SYSCALL_DEFINE2
- COMPAT_SYSCALL_DEFINE2
- KSTK_ESP
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 #include <linux/cpu.h>
19 #include <linux/errno.h>
20 #include <linux/sched.h>
21 #include <linux/sched/task.h>
22 #include <linux/sched/task_stack.h>
23 #include <linux/fs.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/elfcore.h>
27 #include <linux/smp.h>
28 #include <linux/slab.h>
29 #include <linux/user.h>
30 #include <linux/interrupt.h>
31 #include <linux/delay.h>
32 #include <linux/export.h>
33 #include <linux/ptrace.h>
34 #include <linux/notifier.h>
35 #include <linux/kprobes.h>
36 #include <linux/kdebug.h>
37 #include <linux/prctl.h>
38 #include <linux/uaccess.h>
39 #include <linux/io.h>
40 #include <linux/ftrace.h>
41 #include <linux/syscalls.h>
42
43 #include <asm/pgtable.h>
44 #include <asm/processor.h>
45 #include <asm/fpu/internal.h>
46 #include <asm/mmu_context.h>
47 #include <asm/prctl.h>
48 #include <asm/desc.h>
49 #include <asm/proto.h>
50 #include <asm/ia32.h>
51 #include <asm/syscalls.h>
52 #include <asm/debugreg.h>
53 #include <asm/switch_to.h>
54 #include <asm/xen/hypervisor.h>
55 #include <asm/vdso.h>
56 #include <asm/resctrl_sched.h>
57 #include <asm/unistd.h>
58 #include <asm/fsgsbase.h>
59 #ifdef CONFIG_IA32_EMULATION
60
61 #include <asm/unistd_32_ia32.h>
62 #endif
63
64 #include "process.h"
65
66
67 void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
68 {
69 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
70 unsigned long d0, d1, d2, d3, d6, d7;
71 unsigned int fsindex, gsindex;
72 unsigned int ds, es;
73
74 show_iret_regs(regs);
75
76 if (regs->orig_ax != -1)
77 pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
78 else
79 pr_cont("\n");
80
81 printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
82 regs->ax, regs->bx, regs->cx);
83 printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
84 regs->dx, regs->si, regs->di);
85 printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
86 regs->bp, regs->r8, regs->r9);
87 printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
88 regs->r10, regs->r11, regs->r12);
89 printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
90 regs->r13, regs->r14, regs->r15);
91
92 if (mode == SHOW_REGS_SHORT)
93 return;
94
95 if (mode == SHOW_REGS_USER) {
96 rdmsrl(MSR_FS_BASE, fs);
97 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
98 printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n",
99 fs, shadowgs);
100 return;
101 }
102
103 asm("movl %%ds,%0" : "=r" (ds));
104 asm("movl %%es,%0" : "=r" (es));
105 asm("movl %%fs,%0" : "=r" (fsindex));
106 asm("movl %%gs,%0" : "=r" (gsindex));
107
108 rdmsrl(MSR_FS_BASE, fs);
109 rdmsrl(MSR_GS_BASE, gs);
110 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
111
112 cr0 = read_cr0();
113 cr2 = read_cr2();
114 cr3 = __read_cr3();
115 cr4 = __read_cr4();
116
117 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
118 fs, fsindex, gs, gsindex, shadowgs);
119 printk(KERN_DEFAULT "CS: %04lx DS: %04x ES: %04x CR0: %016lx\n", regs->cs, ds,
120 es, cr0);
121 printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
122 cr4);
123
124 get_debugreg(d0, 0);
125 get_debugreg(d1, 1);
126 get_debugreg(d2, 2);
127 get_debugreg(d3, 3);
128 get_debugreg(d6, 6);
129 get_debugreg(d7, 7);
130
131
132 if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
133 (d6 == DR6_RESERVED) && (d7 == 0x400))) {
134 printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n",
135 d0, d1, d2);
136 printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n",
137 d3, d6, d7);
138 }
139
140 if (boot_cpu_has(X86_FEATURE_OSPKE))
141 printk(KERN_DEFAULT "PKRU: %08x\n", read_pkru());
142 }
143
144 void release_thread(struct task_struct *dead_task)
145 {
146 WARN_ON(dead_task->mm);
147 }
148
149 enum which_selector {
150 FS,
151 GS
152 };
153
154
155
156
157
158
159
160 static __always_inline void save_base_legacy(struct task_struct *prev_p,
161 unsigned short selector,
162 enum which_selector which)
163 {
164 if (likely(selector == 0)) {
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181 } else {
182
183
184
185
186
187
188
189
190
191
192 if (which == FS)
193 prev_p->thread.fsbase = 0;
194 else
195 prev_p->thread.gsbase = 0;
196 }
197 }
198
199 static __always_inline void save_fsgs(struct task_struct *task)
200 {
201 savesegment(fs, task->thread.fsindex);
202 savesegment(gs, task->thread.gsindex);
203 save_base_legacy(task, task->thread.fsindex, FS);
204 save_base_legacy(task, task->thread.gsindex, GS);
205 }
206
207 #if IS_ENABLED(CONFIG_KVM)
208
209
210
211
212
213
214 void save_fsgs_for_kvm(void)
215 {
216 save_fsgs(current);
217 }
218 EXPORT_SYMBOL_GPL(save_fsgs_for_kvm);
219 #endif
220
221 static __always_inline void loadseg(enum which_selector which,
222 unsigned short sel)
223 {
224 if (which == FS)
225 loadsegment(fs, sel);
226 else
227 load_gs_index(sel);
228 }
229
230 static __always_inline void load_seg_legacy(unsigned short prev_index,
231 unsigned long prev_base,
232 unsigned short next_index,
233 unsigned long next_base,
234 enum which_selector which)
235 {
236 if (likely(next_index <= 3)) {
237
238
239
240
241 if (next_base == 0) {
242
243
244
245
246 if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
247 loadseg(which, __USER_DS);
248 loadseg(which, next_index);
249 } else {
250
251
252
253
254
255
256
257
258
259
260
261
262 if (likely(prev_index | next_index | prev_base))
263 loadseg(which, next_index);
264 }
265 } else {
266 if (prev_index != next_index)
267 loadseg(which, next_index);
268 wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
269 next_base);
270 }
271 } else {
272
273
274
275
276 loadseg(which, next_index);
277 }
278 }
279
280 static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
281 struct thread_struct *next)
282 {
283 load_seg_legacy(prev->fsindex, prev->fsbase,
284 next->fsindex, next->fsbase, FS);
285 load_seg_legacy(prev->gsindex, prev->gsbase,
286 next->gsindex, next->gsbase, GS);
287 }
288
289 static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
290 unsigned short selector)
291 {
292 unsigned short idx = selector >> 3;
293 unsigned long base;
294
295 if (likely((selector & SEGMENT_TI_MASK) == 0)) {
296 if (unlikely(idx >= GDT_ENTRIES))
297 return 0;
298
299
300
301
302
303 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
304 return 0;
305
306 idx -= GDT_ENTRY_TLS_MIN;
307 base = get_desc_base(&task->thread.tls_array[idx]);
308 } else {
309 #ifdef CONFIG_MODIFY_LDT_SYSCALL
310 struct ldt_struct *ldt;
311
312
313
314
315
316
317 mutex_lock(&task->mm->context.lock);
318 ldt = task->mm->context.ldt;
319 if (unlikely(idx >= ldt->nr_entries))
320 base = 0;
321 else
322 base = get_desc_base(ldt->entries + idx);
323 mutex_unlock(&task->mm->context.lock);
324 #else
325 base = 0;
326 #endif
327 }
328
329 return base;
330 }
331
332 unsigned long x86_fsbase_read_task(struct task_struct *task)
333 {
334 unsigned long fsbase;
335
336 if (task == current)
337 fsbase = x86_fsbase_read_cpu();
338 else if (task->thread.fsindex == 0)
339 fsbase = task->thread.fsbase;
340 else
341 fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex);
342
343 return fsbase;
344 }
345
346 unsigned long x86_gsbase_read_task(struct task_struct *task)
347 {
348 unsigned long gsbase;
349
350 if (task == current)
351 gsbase = x86_gsbase_read_cpu_inactive();
352 else if (task->thread.gsindex == 0)
353 gsbase = task->thread.gsbase;
354 else
355 gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex);
356
357 return gsbase;
358 }
359
360 void x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase)
361 {
362 WARN_ON_ONCE(task == current);
363
364 task->thread.fsbase = fsbase;
365 }
366
367 void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
368 {
369 WARN_ON_ONCE(task == current);
370
371 task->thread.gsbase = gsbase;
372 }
373
374 int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
375 unsigned long arg, struct task_struct *p, unsigned long tls)
376 {
377 int err;
378 struct pt_regs *childregs;
379 struct fork_frame *fork_frame;
380 struct inactive_task_frame *frame;
381 struct task_struct *me = current;
382
383 childregs = task_pt_regs(p);
384 fork_frame = container_of(childregs, struct fork_frame, regs);
385 frame = &fork_frame->frame;
386
387 frame->bp = 0;
388 frame->ret_addr = (unsigned long) ret_from_fork;
389 p->thread.sp = (unsigned long) fork_frame;
390 p->thread.io_bitmap_ptr = NULL;
391
392 savesegment(gs, p->thread.gsindex);
393 p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
394 savesegment(fs, p->thread.fsindex);
395 p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
396 savesegment(es, p->thread.es);
397 savesegment(ds, p->thread.ds);
398 memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
399
400 if (unlikely(p->flags & PF_KTHREAD)) {
401
402 memset(childregs, 0, sizeof(struct pt_regs));
403 frame->bx = sp;
404 frame->r12 = arg;
405 return 0;
406 }
407 frame->bx = 0;
408 *childregs = *current_pt_regs();
409
410 childregs->ax = 0;
411 if (sp)
412 childregs->sp = sp;
413
414 err = -ENOMEM;
415 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
416 p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
417 IO_BITMAP_BYTES, GFP_KERNEL);
418 if (!p->thread.io_bitmap_ptr) {
419 p->thread.io_bitmap_max = 0;
420 return -ENOMEM;
421 }
422 set_tsk_thread_flag(p, TIF_IO_BITMAP);
423 }
424
425
426
427
428 if (clone_flags & CLONE_SETTLS) {
429 #ifdef CONFIG_IA32_EMULATION
430 if (in_ia32_syscall())
431 err = do_set_thread_area(p, -1,
432 (struct user_desc __user *)tls, 0);
433 else
434 #endif
435 err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
436 if (err)
437 goto out;
438 }
439 err = 0;
440 out:
441 if (err && p->thread.io_bitmap_ptr) {
442 kfree(p->thread.io_bitmap_ptr);
443 p->thread.io_bitmap_max = 0;
444 }
445
446 return err;
447 }
448
449 static void
450 start_thread_common(struct pt_regs *regs, unsigned long new_ip,
451 unsigned long new_sp,
452 unsigned int _cs, unsigned int _ss, unsigned int _ds)
453 {
454 WARN_ON_ONCE(regs != current_pt_regs());
455
456 if (static_cpu_has(X86_BUG_NULL_SEG)) {
457
458 loadsegment(fs, __USER_DS);
459 load_gs_index(__USER_DS);
460 }
461
462 loadsegment(fs, 0);
463 loadsegment(es, _ds);
464 loadsegment(ds, _ds);
465 load_gs_index(0);
466
467 regs->ip = new_ip;
468 regs->sp = new_sp;
469 regs->cs = _cs;
470 regs->ss = _ss;
471 regs->flags = X86_EFLAGS_IF;
472 force_iret();
473 }
474
475 void
476 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
477 {
478 start_thread_common(regs, new_ip, new_sp,
479 __USER_CS, __USER_DS, 0);
480 }
481 EXPORT_SYMBOL_GPL(start_thread);
482
483 #ifdef CONFIG_COMPAT
484 void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp)
485 {
486 start_thread_common(regs, new_ip, new_sp,
487 test_thread_flag(TIF_X32)
488 ? __USER_CS : __USER32_CS,
489 __USER_DS, __USER_DS);
490 }
491 #endif
492
493
494
495
496
497
498
499
500
501
502
503 __visible __notrace_funcgraph struct task_struct *
504 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
505 {
506 struct thread_struct *prev = &prev_p->thread;
507 struct thread_struct *next = &next_p->thread;
508 struct fpu *prev_fpu = &prev->fpu;
509 struct fpu *next_fpu = &next->fpu;
510 int cpu = smp_processor_id();
511
512 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
513 this_cpu_read(irq_count) != -1);
514
515 if (!test_thread_flag(TIF_NEED_FPU_LOAD))
516 switch_fpu_prepare(prev_fpu, cpu);
517
518
519
520
521
522
523 save_fsgs(prev_p);
524
525
526
527
528
529 load_TLS(next, cpu);
530
531
532
533
534
535
536 arch_end_context_switch(next_p);
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552 savesegment(es, prev->es);
553 if (unlikely(next->es | prev->es))
554 loadsegment(es, next->es);
555
556 savesegment(ds, prev->ds);
557 if (unlikely(next->ds | prev->ds))
558 loadsegment(ds, next->ds);
559
560 x86_fsgsbase_load(prev, next);
561
562
563
564
565 this_cpu_write(current_task, next_p);
566 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
567
568 switch_fpu_finish(next_fpu);
569
570
571 update_task_stack(next_p);
572
573 switch_to_extra(prev_p, next_p);
574
575 #ifdef CONFIG_XEN_PV
576
577
578
579
580
581 if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
582 prev->iopl != next->iopl))
583 xen_set_iopl_mask(next->iopl);
584 #endif
585
586 if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608 unsigned short ss_sel;
609 savesegment(ss, ss_sel);
610 if (ss_sel != __KERNEL_DS)
611 loadsegment(ss, __KERNEL_DS);
612 }
613
614
615 resctrl_sched_in();
616
617 return prev_p;
618 }
619
620 void set_personality_64bit(void)
621 {
622
623
624
625 clear_thread_flag(TIF_IA32);
626 clear_thread_flag(TIF_ADDR32);
627 clear_thread_flag(TIF_X32);
628
629 task_pt_regs(current)->orig_ax = __NR_execve;
630 current_thread_info()->status &= ~TS_COMPAT;
631
632
633 if (current->mm)
634 current->mm->context.ia32_compat = 0;
635
636
637
638
639
640 current->personality &= ~READ_IMPLIES_EXEC;
641 }
642
643 static void __set_personality_x32(void)
644 {
645 #ifdef CONFIG_X86_X32
646 clear_thread_flag(TIF_IA32);
647 set_thread_flag(TIF_X32);
648 if (current->mm)
649 current->mm->context.ia32_compat = TIF_X32;
650 current->personality &= ~READ_IMPLIES_EXEC;
651
652
653
654
655
656
657
658
659 task_pt_regs(current)->orig_ax = __NR_x32_execve | __X32_SYSCALL_BIT;
660 current_thread_info()->status &= ~TS_COMPAT;
661 #endif
662 }
663
664 static void __set_personality_ia32(void)
665 {
666 #ifdef CONFIG_IA32_EMULATION
667 set_thread_flag(TIF_IA32);
668 clear_thread_flag(TIF_X32);
669 if (current->mm)
670 current->mm->context.ia32_compat = TIF_IA32;
671 current->personality |= force_personality32;
672
673 task_pt_regs(current)->orig_ax = __NR_ia32_execve;
674 current_thread_info()->status |= TS_COMPAT;
675 #endif
676 }
677
678 void set_personality_ia32(bool x32)
679 {
680
681 set_thread_flag(TIF_ADDR32);
682
683 if (x32)
684 __set_personality_x32();
685 else
686 __set_personality_ia32();
687 }
688 EXPORT_SYMBOL_GPL(set_personality_ia32);
689
690 #ifdef CONFIG_CHECKPOINT_RESTORE
691 static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
692 {
693 int ret;
694
695 ret = map_vdso_once(image, addr);
696 if (ret)
697 return ret;
698
699 return (long)image->size;
700 }
701 #endif
702
703 long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
704 {
705 int ret = 0;
706
707 switch (option) {
708 case ARCH_SET_GS: {
709 if (unlikely(arg2 >= TASK_SIZE_MAX))
710 return -EPERM;
711
712 preempt_disable();
713
714
715
716
717
718
719 if (task == current) {
720 loadseg(GS, 0);
721 x86_gsbase_write_cpu_inactive(arg2);
722
723
724
725
726
727 task->thread.gsbase = arg2;
728
729 } else {
730 task->thread.gsindex = 0;
731 x86_gsbase_write_task(task, arg2);
732 }
733 preempt_enable();
734 break;
735 }
736 case ARCH_SET_FS: {
737
738
739
740
741 if (unlikely(arg2 >= TASK_SIZE_MAX))
742 return -EPERM;
743
744 preempt_disable();
745
746
747
748
749 if (task == current) {
750 loadseg(FS, 0);
751 x86_fsbase_write_cpu(arg2);
752
753
754
755
756
757 task->thread.fsbase = arg2;
758 } else {
759 task->thread.fsindex = 0;
760 x86_fsbase_write_task(task, arg2);
761 }
762 preempt_enable();
763 break;
764 }
765 case ARCH_GET_FS: {
766 unsigned long base = x86_fsbase_read_task(task);
767
768 ret = put_user(base, (unsigned long __user *)arg2);
769 break;
770 }
771 case ARCH_GET_GS: {
772 unsigned long base = x86_gsbase_read_task(task);
773
774 ret = put_user(base, (unsigned long __user *)arg2);
775 break;
776 }
777
778 #ifdef CONFIG_CHECKPOINT_RESTORE
779 # ifdef CONFIG_X86_X32_ABI
780 case ARCH_MAP_VDSO_X32:
781 return prctl_map_vdso(&vdso_image_x32, arg2);
782 # endif
783 # if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
784 case ARCH_MAP_VDSO_32:
785 return prctl_map_vdso(&vdso_image_32, arg2);
786 # endif
787 case ARCH_MAP_VDSO_64:
788 return prctl_map_vdso(&vdso_image_64, arg2);
789 #endif
790
791 default:
792 ret = -EINVAL;
793 break;
794 }
795
796 return ret;
797 }
798
799 SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
800 {
801 long ret;
802
803 ret = do_arch_prctl_64(current, option, arg2);
804 if (ret == -EINVAL)
805 ret = do_arch_prctl_common(current, option, arg2);
806
807 return ret;
808 }
809
810 #ifdef CONFIG_IA32_EMULATION
811 COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
812 {
813 return do_arch_prctl_common(current, option, arg2);
814 }
815 #endif
816
817 unsigned long KSTK_ESP(struct task_struct *task)
818 {
819 return task_pt_regs(task)->sp;
820 }