1/* By Ross Biro 1/23/92 */ 2/* 3 * Pentium III FXSR, SSE support 4 * Gareth Hughes <gareth@valinux.com>, May 2000 5 */ 6 7#include <linux/kernel.h> 8#include <linux/sched.h> 9#include <linux/mm.h> 10#include <linux/smp.h> 11#include <linux/errno.h> 12#include <linux/slab.h> 13#include <linux/ptrace.h> 14#include <linux/regset.h> 15#include <linux/tracehook.h> 16#include <linux/user.h> 17#include <linux/elf.h> 18#include <linux/security.h> 19#include <linux/audit.h> 20#include <linux/seccomp.h> 21#include <linux/signal.h> 22#include <linux/perf_event.h> 23#include <linux/hw_breakpoint.h> 24#include <linux/rcupdate.h> 25#include <linux/export.h> 26#include <linux/context_tracking.h> 27 28#include <asm/uaccess.h> 29#include <asm/pgtable.h> 30#include <asm/processor.h> 31#include <asm/i387.h> 32#include <asm/fpu-internal.h> 33#include <asm/debugreg.h> 34#include <asm/ldt.h> 35#include <asm/desc.h> 36#include <asm/prctl.h> 37#include <asm/proto.h> 38#include <asm/hw_breakpoint.h> 39#include <asm/traps.h> 40 41#include "tls.h" 42 43#define CREATE_TRACE_POINTS 44#include <trace/events/syscalls.h> 45 46enum x86_regset { 47 REGSET_GENERAL, 48 REGSET_FP, 49 REGSET_XFP, 50 REGSET_IOPERM64 = REGSET_XFP, 51 REGSET_XSTATE, 52 REGSET_TLS, 53 REGSET_IOPERM32, 54}; 55 56struct pt_regs_offset { 57 const char *name; 58 int offset; 59}; 60 61#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} 62#define REG_OFFSET_END {.name = NULL, .offset = 0} 63 64static const struct pt_regs_offset regoffset_table[] = { 65#ifdef CONFIG_X86_64 66 REG_OFFSET_NAME(r15), 67 REG_OFFSET_NAME(r14), 68 REG_OFFSET_NAME(r13), 69 REG_OFFSET_NAME(r12), 70 REG_OFFSET_NAME(r11), 71 REG_OFFSET_NAME(r10), 72 REG_OFFSET_NAME(r9), 73 REG_OFFSET_NAME(r8), 74#endif 75 REG_OFFSET_NAME(bx), 76 REG_OFFSET_NAME(cx), 77 REG_OFFSET_NAME(dx), 78 REG_OFFSET_NAME(si), 79 REG_OFFSET_NAME(di), 80 REG_OFFSET_NAME(bp), 81 REG_OFFSET_NAME(ax), 82#ifdef CONFIG_X86_32 83 REG_OFFSET_NAME(ds), 84 REG_OFFSET_NAME(es), 85 REG_OFFSET_NAME(fs), 86 REG_OFFSET_NAME(gs), 87#endif 88 REG_OFFSET_NAME(orig_ax), 89 REG_OFFSET_NAME(ip), 90 REG_OFFSET_NAME(cs), 91 REG_OFFSET_NAME(flags), 92 REG_OFFSET_NAME(sp), 93 REG_OFFSET_NAME(ss), 94 REG_OFFSET_END, 95}; 96 97/** 98 * regs_query_register_offset() - query register offset from its name 99 * @name: the name of a register 100 * 101 * regs_query_register_offset() returns the offset of a register in struct 102 * pt_regs from its name. If the name is invalid, this returns -EINVAL; 103 */ 104int regs_query_register_offset(const char *name) 105{ 106 const struct pt_regs_offset *roff; 107 for (roff = regoffset_table; roff->name != NULL; roff++) 108 if (!strcmp(roff->name, name)) 109 return roff->offset; 110 return -EINVAL; 111} 112 113/** 114 * regs_query_register_name() - query register name from its offset 115 * @offset: the offset of a register in struct pt_regs. 116 * 117 * regs_query_register_name() returns the name of a register from its 118 * offset in struct pt_regs. If the @offset is invalid, this returns NULL; 119 */ 120const char *regs_query_register_name(unsigned int offset) 121{ 122 const struct pt_regs_offset *roff; 123 for (roff = regoffset_table; roff->name != NULL; roff++) 124 if (roff->offset == offset) 125 return roff->name; 126 return NULL; 127} 128 129static const int arg_offs_table[] = { 130#ifdef CONFIG_X86_32 131 [0] = offsetof(struct pt_regs, ax), 132 [1] = offsetof(struct pt_regs, dx), 133 [2] = offsetof(struct pt_regs, cx) 134#else /* CONFIG_X86_64 */ 135 [0] = offsetof(struct pt_regs, di), 136 [1] = offsetof(struct pt_regs, si), 137 [2] = offsetof(struct pt_regs, dx), 138 [3] = offsetof(struct pt_regs, cx), 139 [4] = offsetof(struct pt_regs, r8), 140 [5] = offsetof(struct pt_regs, r9) 141#endif 142}; 143 144/* 145 * does not yet catch signals sent when the child dies. 146 * in exit.c or in signal.c. 147 */ 148 149/* 150 * Determines which flags the user has access to [1 = access, 0 = no access]. 151 */ 152#define FLAG_MASK_32 ((unsigned long) \ 153 (X86_EFLAGS_CF | X86_EFLAGS_PF | \ 154 X86_EFLAGS_AF | X86_EFLAGS_ZF | \ 155 X86_EFLAGS_SF | X86_EFLAGS_TF | \ 156 X86_EFLAGS_DF | X86_EFLAGS_OF | \ 157 X86_EFLAGS_RF | X86_EFLAGS_AC)) 158 159/* 160 * Determines whether a value may be installed in a segment register. 161 */ 162static inline bool invalid_selector(u16 value) 163{ 164 return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL); 165} 166 167#ifdef CONFIG_X86_32 168 169#define FLAG_MASK FLAG_MASK_32 170 171/* 172 * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode 173 * when it traps. The previous stack will be directly underneath the saved 174 * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'. 175 * 176 * Now, if the stack is empty, '®s->sp' is out of range. In this 177 * case we try to take the previous stack. To always return a non-null 178 * stack pointer we fall back to regs as stack if no previous stack 179 * exists. 180 * 181 * This is valid only for kernel mode traps. 182 */ 183unsigned long kernel_stack_pointer(struct pt_regs *regs) 184{ 185 unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); 186 unsigned long sp = (unsigned long)®s->sp; 187 u32 *prev_esp; 188 189 if (context == (sp & ~(THREAD_SIZE - 1))) 190 return sp; 191 192 prev_esp = (u32 *)(context); 193 if (prev_esp) 194 return (unsigned long)prev_esp; 195 196 return (unsigned long)regs; 197} 198EXPORT_SYMBOL_GPL(kernel_stack_pointer); 199 200static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) 201{ 202 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); 203 return ®s->bx + (regno >> 2); 204} 205 206static u16 get_segment_reg(struct task_struct *task, unsigned long offset) 207{ 208 /* 209 * Returning the value truncates it to 16 bits. 210 */ 211 unsigned int retval; 212 if (offset != offsetof(struct user_regs_struct, gs)) 213 retval = *pt_regs_access(task_pt_regs(task), offset); 214 else { 215 if (task == current) 216 retval = get_user_gs(task_pt_regs(task)); 217 else 218 retval = task_user_gs(task); 219 } 220 return retval; 221} 222 223static int set_segment_reg(struct task_struct *task, 224 unsigned long offset, u16 value) 225{ 226 /* 227 * The value argument was already truncated to 16 bits. 228 */ 229 if (invalid_selector(value)) 230 return -EIO; 231 232 /* 233 * For %cs and %ss we cannot permit a null selector. 234 * We can permit a bogus selector as long as it has USER_RPL. 235 * Null selectors are fine for other segment registers, but 236 * we will never get back to user mode with invalid %cs or %ss 237 * and will take the trap in iret instead. Much code relies 238 * on user_mode() to distinguish a user trap frame (which can 239 * safely use invalid selectors) from a kernel trap frame. 240 */ 241 switch (offset) { 242 case offsetof(struct user_regs_struct, cs): 243 case offsetof(struct user_regs_struct, ss): 244 if (unlikely(value == 0)) 245 return -EIO; 246 247 default: 248 *pt_regs_access(task_pt_regs(task), offset) = value; 249 break; 250 251 case offsetof(struct user_regs_struct, gs): 252 if (task == current) 253 set_user_gs(task_pt_regs(task), value); 254 else 255 task_user_gs(task) = value; 256 } 257 258 return 0; 259} 260 261#else /* CONFIG_X86_64 */ 262 263#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) 264 265static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset) 266{ 267 BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0); 268 return ®s->r15 + (offset / sizeof(regs->r15)); 269} 270 271static u16 get_segment_reg(struct task_struct *task, unsigned long offset) 272{ 273 /* 274 * Returning the value truncates it to 16 bits. 275 */ 276 unsigned int seg; 277 278 switch (offset) { 279 case offsetof(struct user_regs_struct, fs): 280 if (task == current) { 281 /* Older gas can't assemble movq %?s,%r?? */ 282 asm("movl %%fs,%0" : "=r" (seg)); 283 return seg; 284 } 285 return task->thread.fsindex; 286 case offsetof(struct user_regs_struct, gs): 287 if (task == current) { 288 asm("movl %%gs,%0" : "=r" (seg)); 289 return seg; 290 } 291 return task->thread.gsindex; 292 case offsetof(struct user_regs_struct, ds): 293 if (task == current) { 294 asm("movl %%ds,%0" : "=r" (seg)); 295 return seg; 296 } 297 return task->thread.ds; 298 case offsetof(struct user_regs_struct, es): 299 if (task == current) { 300 asm("movl %%es,%0" : "=r" (seg)); 301 return seg; 302 } 303 return task->thread.es; 304 305 case offsetof(struct user_regs_struct, cs): 306 case offsetof(struct user_regs_struct, ss): 307 break; 308 } 309 return *pt_regs_access(task_pt_regs(task), offset); 310} 311 312static int set_segment_reg(struct task_struct *task, 313 unsigned long offset, u16 value) 314{ 315 /* 316 * The value argument was already truncated to 16 bits. 317 */ 318 if (invalid_selector(value)) 319 return -EIO; 320 321 switch (offset) { 322 case offsetof(struct user_regs_struct,fs): 323 /* 324 * If this is setting fs as for normal 64-bit use but 325 * setting fs_base has implicitly changed it, leave it. 326 */ 327 if ((value == FS_TLS_SEL && task->thread.fsindex == 0 && 328 task->thread.fs != 0) || 329 (value == 0 && task->thread.fsindex == FS_TLS_SEL && 330 task->thread.fs == 0)) 331 break; 332 task->thread.fsindex = value; 333 if (task == current) 334 loadsegment(fs, task->thread.fsindex); 335 break; 336 case offsetof(struct user_regs_struct,gs): 337 /* 338 * If this is setting gs as for normal 64-bit use but 339 * setting gs_base has implicitly changed it, leave it. 340 */ 341 if ((value == GS_TLS_SEL && task->thread.gsindex == 0 && 342 task->thread.gs != 0) || 343 (value == 0 && task->thread.gsindex == GS_TLS_SEL && 344 task->thread.gs == 0)) 345 break; 346 task->thread.gsindex = value; 347 if (task == current) 348 load_gs_index(task->thread.gsindex); 349 break; 350 case offsetof(struct user_regs_struct,ds): 351 task->thread.ds = value; 352 if (task == current) 353 loadsegment(ds, task->thread.ds); 354 break; 355 case offsetof(struct user_regs_struct,es): 356 task->thread.es = value; 357 if (task == current) 358 loadsegment(es, task->thread.es); 359 break; 360 361 /* 362 * Can't actually change these in 64-bit mode. 363 */ 364 case offsetof(struct user_regs_struct,cs): 365 if (unlikely(value == 0)) 366 return -EIO; 367 task_pt_regs(task)->cs = value; 368 break; 369 case offsetof(struct user_regs_struct,ss): 370 if (unlikely(value == 0)) 371 return -EIO; 372 task_pt_regs(task)->ss = value; 373 break; 374 } 375 376 return 0; 377} 378 379#endif /* CONFIG_X86_32 */ 380 381static unsigned long get_flags(struct task_struct *task) 382{ 383 unsigned long retval = task_pt_regs(task)->flags; 384 385 /* 386 * If the debugger set TF, hide it from the readout. 387 */ 388 if (test_tsk_thread_flag(task, TIF_FORCED_TF)) 389 retval &= ~X86_EFLAGS_TF; 390 391 return retval; 392} 393 394static int set_flags(struct task_struct *task, unsigned long value) 395{ 396 struct pt_regs *regs = task_pt_regs(task); 397 398 /* 399 * If the user value contains TF, mark that 400 * it was not "us" (the debugger) that set it. 401 * If not, make sure it stays set if we had. 402 */ 403 if (value & X86_EFLAGS_TF) 404 clear_tsk_thread_flag(task, TIF_FORCED_TF); 405 else if (test_tsk_thread_flag(task, TIF_FORCED_TF)) 406 value |= X86_EFLAGS_TF; 407 408 regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK); 409 410 return 0; 411} 412 413static int putreg(struct task_struct *child, 414 unsigned long offset, unsigned long value) 415{ 416 switch (offset) { 417 case offsetof(struct user_regs_struct, cs): 418 case offsetof(struct user_regs_struct, ds): 419 case offsetof(struct user_regs_struct, es): 420 case offsetof(struct user_regs_struct, fs): 421 case offsetof(struct user_regs_struct, gs): 422 case offsetof(struct user_regs_struct, ss): 423 return set_segment_reg(child, offset, value); 424 425 case offsetof(struct user_regs_struct, flags): 426 return set_flags(child, value); 427 428#ifdef CONFIG_X86_64 429 case offsetof(struct user_regs_struct,fs_base): 430 if (value >= TASK_SIZE_OF(child)) 431 return -EIO; 432 /* 433 * When changing the segment base, use do_arch_prctl 434 * to set either thread.fs or thread.fsindex and the 435 * corresponding GDT slot. 436 */ 437 if (child->thread.fs != value) 438 return do_arch_prctl(child, ARCH_SET_FS, value); 439 return 0; 440 case offsetof(struct user_regs_struct,gs_base): 441 /* 442 * Exactly the same here as the %fs handling above. 443 */ 444 if (value >= TASK_SIZE_OF(child)) 445 return -EIO; 446 if (child->thread.gs != value) 447 return do_arch_prctl(child, ARCH_SET_GS, value); 448 return 0; 449#endif 450 } 451 452 *pt_regs_access(task_pt_regs(child), offset) = value; 453 return 0; 454} 455 456static unsigned long getreg(struct task_struct *task, unsigned long offset) 457{ 458 switch (offset) { 459 case offsetof(struct user_regs_struct, cs): 460 case offsetof(struct user_regs_struct, ds): 461 case offsetof(struct user_regs_struct, es): 462 case offsetof(struct user_regs_struct, fs): 463 case offsetof(struct user_regs_struct, gs): 464 case offsetof(struct user_regs_struct, ss): 465 return get_segment_reg(task, offset); 466 467 case offsetof(struct user_regs_struct, flags): 468 return get_flags(task); 469 470#ifdef CONFIG_X86_64 471 case offsetof(struct user_regs_struct, fs_base): { 472 /* 473 * do_arch_prctl may have used a GDT slot instead of 474 * the MSR. To userland, it appears the same either 475 * way, except the %fs segment selector might not be 0. 476 */ 477 unsigned int seg = task->thread.fsindex; 478 if (task->thread.fs != 0) 479 return task->thread.fs; 480 if (task == current) 481 asm("movl %%fs,%0" : "=r" (seg)); 482 if (seg != FS_TLS_SEL) 483 return 0; 484 return get_desc_base(&task->thread.tls_array[FS_TLS]); 485 } 486 case offsetof(struct user_regs_struct, gs_base): { 487 /* 488 * Exactly the same here as the %fs handling above. 489 */ 490 unsigned int seg = task->thread.gsindex; 491 if (task->thread.gs != 0) 492 return task->thread.gs; 493 if (task == current) 494 asm("movl %%gs,%0" : "=r" (seg)); 495 if (seg != GS_TLS_SEL) 496 return 0; 497 return get_desc_base(&task->thread.tls_array[GS_TLS]); 498 } 499#endif 500 } 501 502 return *pt_regs_access(task_pt_regs(task), offset); 503} 504 505static int genregs_get(struct task_struct *target, 506 const struct user_regset *regset, 507 unsigned int pos, unsigned int count, 508 void *kbuf, void __user *ubuf) 509{ 510 if (kbuf) { 511 unsigned long *k = kbuf; 512 while (count >= sizeof(*k)) { 513 *k++ = getreg(target, pos); 514 count -= sizeof(*k); 515 pos += sizeof(*k); 516 } 517 } else { 518 unsigned long __user *u = ubuf; 519 while (count >= sizeof(*u)) { 520 if (__put_user(getreg(target, pos), u++)) 521 return -EFAULT; 522 count -= sizeof(*u); 523 pos += sizeof(*u); 524 } 525 } 526 527 return 0; 528} 529 530static int genregs_set(struct task_struct *target, 531 const struct user_regset *regset, 532 unsigned int pos, unsigned int count, 533 const void *kbuf, const void __user *ubuf) 534{ 535 int ret = 0; 536 if (kbuf) { 537 const unsigned long *k = kbuf; 538 while (count >= sizeof(*k) && !ret) { 539 ret = putreg(target, pos, *k++); 540 count -= sizeof(*k); 541 pos += sizeof(*k); 542 } 543 } else { 544 const unsigned long __user *u = ubuf; 545 while (count >= sizeof(*u) && !ret) { 546 unsigned long word; 547 ret = __get_user(word, u++); 548 if (ret) 549 break; 550 ret = putreg(target, pos, word); 551 count -= sizeof(*u); 552 pos += sizeof(*u); 553 } 554 } 555 return ret; 556} 557 558static void ptrace_triggered(struct perf_event *bp, 559 struct perf_sample_data *data, 560 struct pt_regs *regs) 561{ 562 int i; 563 struct thread_struct *thread = &(current->thread); 564 565 /* 566 * Store in the virtual DR6 register the fact that the breakpoint 567 * was hit so the thread's debugger will see it. 568 */ 569 for (i = 0; i < HBP_NUM; i++) { 570 if (thread->ptrace_bps[i] == bp) 571 break; 572 } 573 574 thread->debugreg6 |= (DR_TRAP0 << i); 575} 576 577/* 578 * Walk through every ptrace breakpoints for this thread and 579 * build the dr7 value on top of their attributes. 580 * 581 */ 582static unsigned long ptrace_get_dr7(struct perf_event *bp[]) 583{ 584 int i; 585 int dr7 = 0; 586 struct arch_hw_breakpoint *info; 587 588 for (i = 0; i < HBP_NUM; i++) { 589 if (bp[i] && !bp[i]->attr.disabled) { 590 info = counter_arch_bp(bp[i]); 591 dr7 |= encode_dr7(i, info->len, info->type); 592 } 593 } 594 595 return dr7; 596} 597 598static int ptrace_fill_bp_fields(struct perf_event_attr *attr, 599 int len, int type, bool disabled) 600{ 601 int err, bp_len, bp_type; 602 603 err = arch_bp_generic_fields(len, type, &bp_len, &bp_type); 604 if (!err) { 605 attr->bp_len = bp_len; 606 attr->bp_type = bp_type; 607 attr->disabled = disabled; 608 } 609 610 return err; 611} 612 613static struct perf_event * 614ptrace_register_breakpoint(struct task_struct *tsk, int len, int type, 615 unsigned long addr, bool disabled) 616{ 617 struct perf_event_attr attr; 618 int err; 619 620 ptrace_breakpoint_init(&attr); 621 attr.bp_addr = addr; 622 623 err = ptrace_fill_bp_fields(&attr, len, type, disabled); 624 if (err) 625 return ERR_PTR(err); 626 627 return register_user_hw_breakpoint(&attr, ptrace_triggered, 628 NULL, tsk); 629} 630 631static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, 632 int disabled) 633{ 634 struct perf_event_attr attr = bp->attr; 635 int err; 636 637 err = ptrace_fill_bp_fields(&attr, len, type, disabled); 638 if (err) 639 return err; 640 641 return modify_user_hw_breakpoint(bp, &attr); 642} 643 644/* 645 * Handle ptrace writes to debug register 7. 646 */ 647static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) 648{ 649 struct thread_struct *thread = &tsk->thread; 650 unsigned long old_dr7; 651 bool second_pass = false; 652 int i, rc, ret = 0; 653 654 data &= ~DR_CONTROL_RESERVED; 655 old_dr7 = ptrace_get_dr7(thread->ptrace_bps); 656 657restore: 658 rc = 0; 659 for (i = 0; i < HBP_NUM; i++) { 660 unsigned len, type; 661 bool disabled = !decode_dr7(data, i, &len, &type); 662 struct perf_event *bp = thread->ptrace_bps[i]; 663 664 if (!bp) { 665 if (disabled) 666 continue; 667 668 bp = ptrace_register_breakpoint(tsk, 669 len, type, 0, disabled); 670 if (IS_ERR(bp)) { 671 rc = PTR_ERR(bp); 672 break; 673 } 674 675 thread->ptrace_bps[i] = bp; 676 continue; 677 } 678 679 rc = ptrace_modify_breakpoint(bp, len, type, disabled); 680 if (rc) 681 break; 682 } 683 684 /* Restore if the first pass failed, second_pass shouldn't fail. */ 685 if (rc && !WARN_ON(second_pass)) { 686 ret = rc; 687 data = old_dr7; 688 second_pass = true; 689 goto restore; 690 } 691 692 return ret; 693} 694 695/* 696 * Handle PTRACE_PEEKUSR calls for the debug register area. 697 */ 698static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) 699{ 700 struct thread_struct *thread = &tsk->thread; 701 unsigned long val = 0; 702 703 if (n < HBP_NUM) { 704 struct perf_event *bp = thread->ptrace_bps[n]; 705 706 if (bp) 707 val = bp->hw.info.address; 708 } else if (n == 6) { 709 val = thread->debugreg6; 710 } else if (n == 7) { 711 val = thread->ptrace_dr7; 712 } 713 return val; 714} 715 716static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, 717 unsigned long addr) 718{ 719 struct thread_struct *t = &tsk->thread; 720 struct perf_event *bp = t->ptrace_bps[nr]; 721 int err = 0; 722 723 if (!bp) { 724 /* 725 * Put stub len and type to create an inactive but correct bp. 726 * 727 * CHECKME: the previous code returned -EIO if the addr wasn't 728 * a valid task virtual addr. The new one will return -EINVAL in 729 * this case. 730 * -EINVAL may be what we want for in-kernel breakpoints users, 731 * but -EIO looks better for ptrace, since we refuse a register 732 * writing for the user. And anyway this is the previous 733 * behaviour. 734 */ 735 bp = ptrace_register_breakpoint(tsk, 736 X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE, 737 addr, true); 738 if (IS_ERR(bp)) 739 err = PTR_ERR(bp); 740 else 741 t->ptrace_bps[nr] = bp; 742 } else { 743 struct perf_event_attr attr = bp->attr; 744 745 attr.bp_addr = addr; 746 err = modify_user_hw_breakpoint(bp, &attr); 747 } 748 749 return err; 750} 751 752/* 753 * Handle PTRACE_POKEUSR calls for the debug register area. 754 */ 755static int ptrace_set_debugreg(struct task_struct *tsk, int n, 756 unsigned long val) 757{ 758 struct thread_struct *thread = &tsk->thread; 759 /* There are no DR4 or DR5 registers */ 760 int rc = -EIO; 761 762 if (n < HBP_NUM) { 763 rc = ptrace_set_breakpoint_addr(tsk, n, val); 764 } else if (n == 6) { 765 thread->debugreg6 = val; 766 rc = 0; 767 } else if (n == 7) { 768 rc = ptrace_write_dr7(tsk, val); 769 if (!rc) 770 thread->ptrace_dr7 = val; 771 } 772 return rc; 773} 774 775/* 776 * These access the current or another (stopped) task's io permission 777 * bitmap for debugging or core dump. 778 */ 779static int ioperm_active(struct task_struct *target, 780 const struct user_regset *regset) 781{ 782 return target->thread.io_bitmap_max / regset->size; 783} 784 785static int ioperm_get(struct task_struct *target, 786 const struct user_regset *regset, 787 unsigned int pos, unsigned int count, 788 void *kbuf, void __user *ubuf) 789{ 790 if (!target->thread.io_bitmap_ptr) 791 return -ENXIO; 792 793 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, 794 target->thread.io_bitmap_ptr, 795 0, IO_BITMAP_BYTES); 796} 797 798/* 799 * Called by kernel/ptrace.c when detaching.. 800 * 801 * Make sure the single step bit is not set. 802 */ 803void ptrace_disable(struct task_struct *child) 804{ 805 user_disable_single_step(child); 806#ifdef TIF_SYSCALL_EMU 807 clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); 808#endif 809} 810 811#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 812static const struct user_regset_view user_x86_32_view; /* Initialized below. */ 813#endif 814 815long arch_ptrace(struct task_struct *child, long request, 816 unsigned long addr, unsigned long data) 817{ 818 int ret; 819 unsigned long __user *datap = (unsigned long __user *)data; 820 821 switch (request) { 822 /* read the word at location addr in the USER area. */ 823 case PTRACE_PEEKUSR: { 824 unsigned long tmp; 825 826 ret = -EIO; 827 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) 828 break; 829 830 tmp = 0; /* Default return condition */ 831 if (addr < sizeof(struct user_regs_struct)) 832 tmp = getreg(child, addr); 833 else if (addr >= offsetof(struct user, u_debugreg[0]) && 834 addr <= offsetof(struct user, u_debugreg[7])) { 835 addr -= offsetof(struct user, u_debugreg[0]); 836 tmp = ptrace_get_debugreg(child, addr / sizeof(data)); 837 } 838 ret = put_user(tmp, datap); 839 break; 840 } 841 842 case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ 843 ret = -EIO; 844 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) 845 break; 846 847 if (addr < sizeof(struct user_regs_struct)) 848 ret = putreg(child, addr, data); 849 else if (addr >= offsetof(struct user, u_debugreg[0]) && 850 addr <= offsetof(struct user, u_debugreg[7])) { 851 addr -= offsetof(struct user, u_debugreg[0]); 852 ret = ptrace_set_debugreg(child, 853 addr / sizeof(data), data); 854 } 855 break; 856 857 case PTRACE_GETREGS: /* Get all gp regs from the child. */ 858 return copy_regset_to_user(child, 859 task_user_regset_view(current), 860 REGSET_GENERAL, 861 0, sizeof(struct user_regs_struct), 862 datap); 863 864 case PTRACE_SETREGS: /* Set all gp regs in the child. */ 865 return copy_regset_from_user(child, 866 task_user_regset_view(current), 867 REGSET_GENERAL, 868 0, sizeof(struct user_regs_struct), 869 datap); 870 871 case PTRACE_GETFPREGS: /* Get the child FPU state. */ 872 return copy_regset_to_user(child, 873 task_user_regset_view(current), 874 REGSET_FP, 875 0, sizeof(struct user_i387_struct), 876 datap); 877 878 case PTRACE_SETFPREGS: /* Set the child FPU state. */ 879 return copy_regset_from_user(child, 880 task_user_regset_view(current), 881 REGSET_FP, 882 0, sizeof(struct user_i387_struct), 883 datap); 884 885#ifdef CONFIG_X86_32 886 case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ 887 return copy_regset_to_user(child, &user_x86_32_view, 888 REGSET_XFP, 889 0, sizeof(struct user_fxsr_struct), 890 datap) ? -EIO : 0; 891 892 case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ 893 return copy_regset_from_user(child, &user_x86_32_view, 894 REGSET_XFP, 895 0, sizeof(struct user_fxsr_struct), 896 datap) ? -EIO : 0; 897#endif 898 899#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 900 case PTRACE_GET_THREAD_AREA: 901 if ((int) addr < 0) 902 return -EIO; 903 ret = do_get_thread_area(child, addr, 904 (struct user_desc __user *)data); 905 break; 906 907 case PTRACE_SET_THREAD_AREA: 908 if ((int) addr < 0) 909 return -EIO; 910 ret = do_set_thread_area(child, addr, 911 (struct user_desc __user *)data, 0); 912 break; 913#endif 914 915#ifdef CONFIG_X86_64 916 /* normal 64bit interface to access TLS data. 917 Works just like arch_prctl, except that the arguments 918 are reversed. */ 919 case PTRACE_ARCH_PRCTL: 920 ret = do_arch_prctl(child, data, addr); 921 break; 922#endif 923 924 default: 925 ret = ptrace_request(child, request, addr, data); 926 break; 927 } 928 929 return ret; 930} 931 932#ifdef CONFIG_IA32_EMULATION 933 934#include <linux/compat.h> 935#include <linux/syscalls.h> 936#include <asm/ia32.h> 937#include <asm/user32.h> 938 939#define R32(l,q) \ 940 case offsetof(struct user32, regs.l): \ 941 regs->q = value; break 942 943#define SEG32(rs) \ 944 case offsetof(struct user32, regs.rs): \ 945 return set_segment_reg(child, \ 946 offsetof(struct user_regs_struct, rs), \ 947 value); \ 948 break 949 950static int putreg32(struct task_struct *child, unsigned regno, u32 value) 951{ 952 struct pt_regs *regs = task_pt_regs(child); 953 954 switch (regno) { 955 956 SEG32(cs); 957 SEG32(ds); 958 SEG32(es); 959 SEG32(fs); 960 SEG32(gs); 961 SEG32(ss); 962 963 R32(ebx, bx); 964 R32(ecx, cx); 965 R32(edx, dx); 966 R32(edi, di); 967 R32(esi, si); 968 R32(ebp, bp); 969 R32(eax, ax); 970 R32(eip, ip); 971 R32(esp, sp); 972 973 case offsetof(struct user32, regs.orig_eax): 974 /* 975 * A 32-bit debugger setting orig_eax means to restore 976 * the state of the task restarting a 32-bit syscall. 977 * Make sure we interpret the -ERESTART* codes correctly 978 * in case the task is not actually still sitting at the 979 * exit from a 32-bit syscall with TS_COMPAT still set. 980 */ 981 regs->orig_ax = value; 982 if (syscall_get_nr(child, regs) >= 0) 983 task_thread_info(child)->status |= TS_COMPAT; 984 break; 985 986 case offsetof(struct user32, regs.eflags): 987 return set_flags(child, value); 988 989 case offsetof(struct user32, u_debugreg[0]) ... 990 offsetof(struct user32, u_debugreg[7]): 991 regno -= offsetof(struct user32, u_debugreg[0]); 992 return ptrace_set_debugreg(child, regno / 4, value); 993 994 default: 995 if (regno > sizeof(struct user32) || (regno & 3)) 996 return -EIO; 997 998 /* 999 * Other dummy fields in the virtual user structure 1000 * are ignored 1001 */ 1002 break; 1003 } 1004 return 0; 1005} 1006 1007#undef R32 1008#undef SEG32 1009 1010#define R32(l,q) \ 1011 case offsetof(struct user32, regs.l): \ 1012 *val = regs->q; break 1013 1014#define SEG32(rs) \ 1015 case offsetof(struct user32, regs.rs): \ 1016 *val = get_segment_reg(child, \ 1017 offsetof(struct user_regs_struct, rs)); \ 1018 break 1019 1020static int getreg32(struct task_struct *child, unsigned regno, u32 *val) 1021{ 1022 struct pt_regs *regs = task_pt_regs(child); 1023 1024 switch (regno) { 1025 1026 SEG32(ds); 1027 SEG32(es); 1028 SEG32(fs); 1029 SEG32(gs); 1030 1031 R32(cs, cs); 1032 R32(ss, ss); 1033 R32(ebx, bx); 1034 R32(ecx, cx); 1035 R32(edx, dx); 1036 R32(edi, di); 1037 R32(esi, si); 1038 R32(ebp, bp); 1039 R32(eax, ax); 1040 R32(orig_eax, orig_ax); 1041 R32(eip, ip); 1042 R32(esp, sp); 1043 1044 case offsetof(struct user32, regs.eflags): 1045 *val = get_flags(child); 1046 break; 1047 1048 case offsetof(struct user32, u_debugreg[0]) ... 1049 offsetof(struct user32, u_debugreg[7]): 1050 regno -= offsetof(struct user32, u_debugreg[0]); 1051 *val = ptrace_get_debugreg(child, regno / 4); 1052 break; 1053 1054 default: 1055 if (regno > sizeof(struct user32) || (regno & 3)) 1056 return -EIO; 1057 1058 /* 1059 * Other dummy fields in the virtual user structure 1060 * are ignored 1061 */ 1062 *val = 0; 1063 break; 1064 } 1065 return 0; 1066} 1067 1068#undef R32 1069#undef SEG32 1070 1071static int genregs32_get(struct task_struct *target, 1072 const struct user_regset *regset, 1073 unsigned int pos, unsigned int count, 1074 void *kbuf, void __user *ubuf) 1075{ 1076 if (kbuf) { 1077 compat_ulong_t *k = kbuf; 1078 while (count >= sizeof(*k)) { 1079 getreg32(target, pos, k++); 1080 count -= sizeof(*k); 1081 pos += sizeof(*k); 1082 } 1083 } else { 1084 compat_ulong_t __user *u = ubuf; 1085 while (count >= sizeof(*u)) { 1086 compat_ulong_t word; 1087 getreg32(target, pos, &word); 1088 if (__put_user(word, u++)) 1089 return -EFAULT; 1090 count -= sizeof(*u); 1091 pos += sizeof(*u); 1092 } 1093 } 1094 1095 return 0; 1096} 1097 1098static int genregs32_set(struct task_struct *target, 1099 const struct user_regset *regset, 1100 unsigned int pos, unsigned int count, 1101 const void *kbuf, const void __user *ubuf) 1102{ 1103 int ret = 0; 1104 if (kbuf) { 1105 const compat_ulong_t *k = kbuf; 1106 while (count >= sizeof(*k) && !ret) { 1107 ret = putreg32(target, pos, *k++); 1108 count -= sizeof(*k); 1109 pos += sizeof(*k); 1110 } 1111 } else { 1112 const compat_ulong_t __user *u = ubuf; 1113 while (count >= sizeof(*u) && !ret) { 1114 compat_ulong_t word; 1115 ret = __get_user(word, u++); 1116 if (ret) 1117 break; 1118 ret = putreg32(target, pos, word); 1119 count -= sizeof(*u); 1120 pos += sizeof(*u); 1121 } 1122 } 1123 return ret; 1124} 1125 1126#ifdef CONFIG_X86_X32_ABI 1127static long x32_arch_ptrace(struct task_struct *child, 1128 compat_long_t request, compat_ulong_t caddr, 1129 compat_ulong_t cdata) 1130{ 1131 unsigned long addr = caddr; 1132 unsigned long data = cdata; 1133 void __user *datap = compat_ptr(data); 1134 int ret; 1135 1136 switch (request) { 1137 /* Read 32bits at location addr in the USER area. Only allow 1138 to return the lower 32bits of segment and debug registers. */ 1139 case PTRACE_PEEKUSR: { 1140 u32 tmp; 1141 1142 ret = -EIO; 1143 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || 1144 addr < offsetof(struct user_regs_struct, cs)) 1145 break; 1146 1147 tmp = 0; /* Default return condition */ 1148 if (addr < sizeof(struct user_regs_struct)) 1149 tmp = getreg(child, addr); 1150 else if (addr >= offsetof(struct user, u_debugreg[0]) && 1151 addr <= offsetof(struct user, u_debugreg[7])) { 1152 addr -= offsetof(struct user, u_debugreg[0]); 1153 tmp = ptrace_get_debugreg(child, addr / sizeof(data)); 1154 } 1155 ret = put_user(tmp, (__u32 __user *)datap); 1156 break; 1157 } 1158 1159 /* Write the word at location addr in the USER area. Only allow 1160 to update segment and debug registers with the upper 32bits 1161 zero-extended. */ 1162 case PTRACE_POKEUSR: 1163 ret = -EIO; 1164 if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || 1165 addr < offsetof(struct user_regs_struct, cs)) 1166 break; 1167 1168 if (addr < sizeof(struct user_regs_struct)) 1169 ret = putreg(child, addr, data); 1170 else if (addr >= offsetof(struct user, u_debugreg[0]) && 1171 addr <= offsetof(struct user, u_debugreg[7])) { 1172 addr -= offsetof(struct user, u_debugreg[0]); 1173 ret = ptrace_set_debugreg(child, 1174 addr / sizeof(data), data); 1175 } 1176 break; 1177 1178 case PTRACE_GETREGS: /* Get all gp regs from the child. */ 1179 return copy_regset_to_user(child, 1180 task_user_regset_view(current), 1181 REGSET_GENERAL, 1182 0, sizeof(struct user_regs_struct), 1183 datap); 1184 1185 case PTRACE_SETREGS: /* Set all gp regs in the child. */ 1186 return copy_regset_from_user(child, 1187 task_user_regset_view(current), 1188 REGSET_GENERAL, 1189 0, sizeof(struct user_regs_struct), 1190 datap); 1191 1192 case PTRACE_GETFPREGS: /* Get the child FPU state. */ 1193 return copy_regset_to_user(child, 1194 task_user_regset_view(current), 1195 REGSET_FP, 1196 0, sizeof(struct user_i387_struct), 1197 datap); 1198 1199 case PTRACE_SETFPREGS: /* Set the child FPU state. */ 1200 return copy_regset_from_user(child, 1201 task_user_regset_view(current), 1202 REGSET_FP, 1203 0, sizeof(struct user_i387_struct), 1204 datap); 1205 1206 default: 1207 return compat_ptrace_request(child, request, addr, data); 1208 } 1209 1210 return ret; 1211} 1212#endif 1213 1214long compat_arch_ptrace(struct task_struct *child, compat_long_t request, 1215 compat_ulong_t caddr, compat_ulong_t cdata) 1216{ 1217 unsigned long addr = caddr; 1218 unsigned long data = cdata; 1219 void __user *datap = compat_ptr(data); 1220 int ret; 1221 __u32 val; 1222 1223#ifdef CONFIG_X86_X32_ABI 1224 if (!is_ia32_task()) 1225 return x32_arch_ptrace(child, request, caddr, cdata); 1226#endif 1227 1228 switch (request) { 1229 case PTRACE_PEEKUSR: 1230 ret = getreg32(child, addr, &val); 1231 if (ret == 0) 1232 ret = put_user(val, (__u32 __user *)datap); 1233 break; 1234 1235 case PTRACE_POKEUSR: 1236 ret = putreg32(child, addr, data); 1237 break; 1238 1239 case PTRACE_GETREGS: /* Get all gp regs from the child. */ 1240 return copy_regset_to_user(child, &user_x86_32_view, 1241 REGSET_GENERAL, 1242 0, sizeof(struct user_regs_struct32), 1243 datap); 1244 1245 case PTRACE_SETREGS: /* Set all gp regs in the child. */ 1246 return copy_regset_from_user(child, &user_x86_32_view, 1247 REGSET_GENERAL, 0, 1248 sizeof(struct user_regs_struct32), 1249 datap); 1250 1251 case PTRACE_GETFPREGS: /* Get the child FPU state. */ 1252 return copy_regset_to_user(child, &user_x86_32_view, 1253 REGSET_FP, 0, 1254 sizeof(struct user_i387_ia32_struct), 1255 datap); 1256 1257 case PTRACE_SETFPREGS: /* Set the child FPU state. */ 1258 return copy_regset_from_user( 1259 child, &user_x86_32_view, REGSET_FP, 1260 0, sizeof(struct user_i387_ia32_struct), datap); 1261 1262 case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ 1263 return copy_regset_to_user(child, &user_x86_32_view, 1264 REGSET_XFP, 0, 1265 sizeof(struct user32_fxsr_struct), 1266 datap); 1267 1268 case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ 1269 return copy_regset_from_user(child, &user_x86_32_view, 1270 REGSET_XFP, 0, 1271 sizeof(struct user32_fxsr_struct), 1272 datap); 1273 1274 case PTRACE_GET_THREAD_AREA: 1275 case PTRACE_SET_THREAD_AREA: 1276 return arch_ptrace(child, request, addr, data); 1277 1278 default: 1279 return compat_ptrace_request(child, request, addr, data); 1280 } 1281 1282 return ret; 1283} 1284 1285#endif /* CONFIG_IA32_EMULATION */ 1286 1287#ifdef CONFIG_X86_64 1288 1289static struct user_regset x86_64_regsets[] __read_mostly = { 1290 [REGSET_GENERAL] = { 1291 .core_note_type = NT_PRSTATUS, 1292 .n = sizeof(struct user_regs_struct) / sizeof(long), 1293 .size = sizeof(long), .align = sizeof(long), 1294 .get = genregs_get, .set = genregs_set 1295 }, 1296 [REGSET_FP] = { 1297 .core_note_type = NT_PRFPREG, 1298 .n = sizeof(struct user_i387_struct) / sizeof(long), 1299 .size = sizeof(long), .align = sizeof(long), 1300 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1301 }, 1302 [REGSET_XSTATE] = { 1303 .core_note_type = NT_X86_XSTATE, 1304 .size = sizeof(u64), .align = sizeof(u64), 1305 .active = xstateregs_active, .get = xstateregs_get, 1306 .set = xstateregs_set 1307 }, 1308 [REGSET_IOPERM64] = { 1309 .core_note_type = NT_386_IOPERM, 1310 .n = IO_BITMAP_LONGS, 1311 .size = sizeof(long), .align = sizeof(long), 1312 .active = ioperm_active, .get = ioperm_get 1313 }, 1314}; 1315 1316static const struct user_regset_view user_x86_64_view = { 1317 .name = "x86_64", .e_machine = EM_X86_64, 1318 .regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets) 1319}; 1320 1321#else /* CONFIG_X86_32 */ 1322 1323#define user_regs_struct32 user_regs_struct 1324#define genregs32_get genregs_get 1325#define genregs32_set genregs_set 1326 1327#endif /* CONFIG_X86_64 */ 1328 1329#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1330static struct user_regset x86_32_regsets[] __read_mostly = { 1331 [REGSET_GENERAL] = { 1332 .core_note_type = NT_PRSTATUS, 1333 .n = sizeof(struct user_regs_struct32) / sizeof(u32), 1334 .size = sizeof(u32), .align = sizeof(u32), 1335 .get = genregs32_get, .set = genregs32_set 1336 }, 1337 [REGSET_FP] = { 1338 .core_note_type = NT_PRFPREG, 1339 .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), 1340 .size = sizeof(u32), .align = sizeof(u32), 1341 .active = fpregs_active, .get = fpregs_get, .set = fpregs_set 1342 }, 1343 [REGSET_XFP] = { 1344 .core_note_type = NT_PRXFPREG, 1345 .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), 1346 .size = sizeof(u32), .align = sizeof(u32), 1347 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1348 }, 1349 [REGSET_XSTATE] = { 1350 .core_note_type = NT_X86_XSTATE, 1351 .size = sizeof(u64), .align = sizeof(u64), 1352 .active = xstateregs_active, .get = xstateregs_get, 1353 .set = xstateregs_set 1354 }, 1355 [REGSET_TLS] = { 1356 .core_note_type = NT_386_TLS, 1357 .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, 1358 .size = sizeof(struct user_desc), 1359 .align = sizeof(struct user_desc), 1360 .active = regset_tls_active, 1361 .get = regset_tls_get, .set = regset_tls_set 1362 }, 1363 [REGSET_IOPERM32] = { 1364 .core_note_type = NT_386_IOPERM, 1365 .n = IO_BITMAP_BYTES / sizeof(u32), 1366 .size = sizeof(u32), .align = sizeof(u32), 1367 .active = ioperm_active, .get = ioperm_get 1368 }, 1369}; 1370 1371static const struct user_regset_view user_x86_32_view = { 1372 .name = "i386", .e_machine = EM_386, 1373 .regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets) 1374}; 1375#endif 1376 1377/* 1378 * This represents bytes 464..511 in the memory layout exported through 1379 * the REGSET_XSTATE interface. 1380 */ 1381u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; 1382 1383void update_regset_xstate_info(unsigned int size, u64 xstate_mask) 1384{ 1385#ifdef CONFIG_X86_64 1386 x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); 1387#endif 1388#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1389 x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64); 1390#endif 1391 xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask; 1392} 1393 1394const struct user_regset_view *task_user_regset_view(struct task_struct *task) 1395{ 1396#ifdef CONFIG_IA32_EMULATION 1397 if (test_tsk_thread_flag(task, TIF_IA32)) 1398#endif 1399#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION 1400 return &user_x86_32_view; 1401#endif 1402#ifdef CONFIG_X86_64 1403 return &user_x86_64_view; 1404#endif 1405} 1406 1407static void fill_sigtrap_info(struct task_struct *tsk, 1408 struct pt_regs *regs, 1409 int error_code, int si_code, 1410 struct siginfo *info) 1411{ 1412 tsk->thread.trap_nr = X86_TRAP_DB; 1413 tsk->thread.error_code = error_code; 1414 1415 memset(info, 0, sizeof(*info)); 1416 info->si_signo = SIGTRAP; 1417 info->si_code = si_code; 1418 info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; 1419} 1420 1421void user_single_step_siginfo(struct task_struct *tsk, 1422 struct pt_regs *regs, 1423 struct siginfo *info) 1424{ 1425 fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info); 1426} 1427 1428void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, 1429 int error_code, int si_code) 1430{ 1431 struct siginfo info; 1432 1433 fill_sigtrap_info(tsk, regs, error_code, si_code, &info); 1434 /* Send us the fake SIGTRAP */ 1435 force_sig_info(SIGTRAP, &info, tsk); 1436} 1437 1438static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) 1439{ 1440#ifdef CONFIG_X86_64 1441 if (arch == AUDIT_ARCH_X86_64) { 1442 audit_syscall_entry(regs->orig_ax, regs->di, 1443 regs->si, regs->dx, regs->r10); 1444 } else 1445#endif 1446 { 1447 audit_syscall_entry(regs->orig_ax, regs->bx, 1448 regs->cx, regs->dx, regs->si); 1449 } 1450} 1451 1452/* 1453 * We can return 0 to resume the syscall or anything else to go to phase 1454 * 2. If we resume the syscall, we need to put something appropriate in 1455 * regs->orig_ax. 1456 * 1457 * NB: We don't have full pt_regs here, but regs->orig_ax and regs->ax 1458 * are fully functional. 1459 * 1460 * For phase 2's benefit, our return value is: 1461 * 0: resume the syscall 1462 * 1: go to phase 2; no seccomp phase 2 needed 1463 * anything else: go to phase 2; pass return value to seccomp 1464 */ 1465unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) 1466{ 1467 unsigned long ret = 0; 1468 u32 work; 1469 1470 BUG_ON(regs != task_pt_regs(current)); 1471 1472 work = ACCESS_ONCE(current_thread_info()->flags) & 1473 _TIF_WORK_SYSCALL_ENTRY; 1474 1475 /* 1476 * If TIF_NOHZ is set, we are required to call user_exit() before 1477 * doing anything that could touch RCU. 1478 */ 1479 if (work & _TIF_NOHZ) { 1480 user_exit(); 1481 work &= ~_TIF_NOHZ; 1482 } 1483 1484#ifdef CONFIG_SECCOMP 1485 /* 1486 * Do seccomp first -- it should minimize exposure of other 1487 * code, and keeping seccomp fast is probably more valuable 1488 * than the rest of this. 1489 */ 1490 if (work & _TIF_SECCOMP) { 1491 struct seccomp_data sd; 1492 1493 sd.arch = arch; 1494 sd.nr = regs->orig_ax; 1495 sd.instruction_pointer = regs->ip; 1496#ifdef CONFIG_X86_64 1497 if (arch == AUDIT_ARCH_X86_64) { 1498 sd.args[0] = regs->di; 1499 sd.args[1] = regs->si; 1500 sd.args[2] = regs->dx; 1501 sd.args[3] = regs->r10; 1502 sd.args[4] = regs->r8; 1503 sd.args[5] = regs->r9; 1504 } else 1505#endif 1506 { 1507 sd.args[0] = regs->bx; 1508 sd.args[1] = regs->cx; 1509 sd.args[2] = regs->dx; 1510 sd.args[3] = regs->si; 1511 sd.args[4] = regs->di; 1512 sd.args[5] = regs->bp; 1513 } 1514 1515 BUILD_BUG_ON(SECCOMP_PHASE1_OK != 0); 1516 BUILD_BUG_ON(SECCOMP_PHASE1_SKIP != 1); 1517 1518 ret = seccomp_phase1(&sd); 1519 if (ret == SECCOMP_PHASE1_SKIP) { 1520 regs->orig_ax = -1; 1521 ret = 0; 1522 } else if (ret != SECCOMP_PHASE1_OK) { 1523 return ret; /* Go directly to phase 2 */ 1524 } 1525 1526 work &= ~_TIF_SECCOMP; 1527 } 1528#endif 1529 1530 /* Do our best to finish without phase 2. */ 1531 if (work == 0) 1532 return ret; /* seccomp and/or nohz only (ret == 0 here) */ 1533 1534#ifdef CONFIG_AUDITSYSCALL 1535 if (work == _TIF_SYSCALL_AUDIT) { 1536 /* 1537 * If there is no more work to be done except auditing, 1538 * then audit in phase 1. Phase 2 always audits, so, if 1539 * we audit here, then we can't go on to phase 2. 1540 */ 1541 do_audit_syscall_entry(regs, arch); 1542 return 0; 1543 } 1544#endif 1545 1546 return 1; /* Something is enabled that we can't handle in phase 1 */ 1547} 1548 1549/* Returns the syscall nr to run (which should match regs->orig_ax). */ 1550long syscall_trace_enter_phase2(struct pt_regs *regs, u32 arch, 1551 unsigned long phase1_result) 1552{ 1553 long ret = 0; 1554 u32 work = ACCESS_ONCE(current_thread_info()->flags) & 1555 _TIF_WORK_SYSCALL_ENTRY; 1556 1557 BUG_ON(regs != task_pt_regs(current)); 1558 1559 /* 1560 * If we stepped into a sysenter/syscall insn, it trapped in 1561 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. 1562 * If user-mode had set TF itself, then it's still clear from 1563 * do_debug() and we need to set it again to restore the user 1564 * state. If we entered on the slow path, TF was already set. 1565 */ 1566 if (work & _TIF_SINGLESTEP) 1567 regs->flags |= X86_EFLAGS_TF; 1568 1569#ifdef CONFIG_SECCOMP 1570 /* 1571 * Call seccomp_phase2 before running the other hooks so that 1572 * they can see any changes made by a seccomp tracer. 1573 */ 1574 if (phase1_result > 1 && seccomp_phase2(phase1_result)) { 1575 /* seccomp failures shouldn't expose any additional code. */ 1576 return -1; 1577 } 1578#endif 1579 1580 if (unlikely(work & _TIF_SYSCALL_EMU)) 1581 ret = -1L; 1582 1583 if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && 1584 tracehook_report_syscall_entry(regs)) 1585 ret = -1L; 1586 1587 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 1588 trace_sys_enter(regs, regs->orig_ax); 1589 1590 do_audit_syscall_entry(regs, arch); 1591 1592 return ret ?: regs->orig_ax; 1593} 1594 1595long syscall_trace_enter(struct pt_regs *regs) 1596{ 1597 u32 arch = is_ia32_task() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; 1598 unsigned long phase1_result = syscall_trace_enter_phase1(regs, arch); 1599 1600 if (phase1_result == 0) 1601 return regs->orig_ax; 1602 else 1603 return syscall_trace_enter_phase2(regs, arch, phase1_result); 1604} 1605 1606void syscall_trace_leave(struct pt_regs *regs) 1607{ 1608 bool step; 1609 1610 /* 1611 * We may come here right after calling schedule_user() 1612 * or do_notify_resume(), in which case we can be in RCU 1613 * user mode. 1614 */ 1615 user_exit(); 1616 1617 audit_syscall_exit(regs); 1618 1619 if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 1620 trace_sys_exit(regs, regs->ax); 1621 1622 /* 1623 * If TIF_SYSCALL_EMU is set, we only get here because of 1624 * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). 1625 * We already reported this syscall instruction in 1626 * syscall_trace_enter(). 1627 */ 1628 step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && 1629 !test_thread_flag(TIF_SYSCALL_EMU); 1630 if (step || test_thread_flag(TIF_SYSCALL_TRACE)) 1631 tracehook_report_syscall_exit(regs, step); 1632 1633 user_enter(); 1634} 1635