root/arch/x86/kernel/kprobes/opt.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. __recover_optprobed_insn
  2. synthesize_set_arg1
  3. optimized_callback
  4. copy_optimized_instructions
  5. __insn_is_indirect_jump
  6. insn_jump_into_range
  7. insn_is_indirect_jump
  8. can_optimize
  9. arch_check_optimized_kprobe
  10. arch_within_optimized_kprobe
  11. __arch_remove_optimized_kprobe
  12. arch_remove_optimized_kprobe
  13. arch_prepare_optimized_kprobe
  14. arch_optimize_kprobes
  15. arch_unoptimize_kprobe
  16. arch_unoptimize_kprobes
  17. setup_detour_execution

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  *  Kernel Probes Jump Optimization (Optprobes)
   4  *
   5  * Copyright (C) IBM Corporation, 2002, 2004
   6  * Copyright (C) Hitachi Ltd., 2012
   7  */
   8 #include <linux/kprobes.h>
   9 #include <linux/ptrace.h>
  10 #include <linux/string.h>
  11 #include <linux/slab.h>
  12 #include <linux/hardirq.h>
  13 #include <linux/preempt.h>
  14 #include <linux/extable.h>
  15 #include <linux/kdebug.h>
  16 #include <linux/kallsyms.h>
  17 #include <linux/ftrace.h>
  18 #include <linux/frame.h>
  19 
  20 #include <asm/text-patching.h>
  21 #include <asm/cacheflush.h>
  22 #include <asm/desc.h>
  23 #include <asm/pgtable.h>
  24 #include <linux/uaccess.h>
  25 #include <asm/alternative.h>
  26 #include <asm/insn.h>
  27 #include <asm/debugreg.h>
  28 #include <asm/set_memory.h>
  29 #include <asm/sections.h>
  30 #include <asm/nospec-branch.h>
  31 
  32 #include "common.h"
  33 
  34 unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
  35 {
  36         struct optimized_kprobe *op;
  37         struct kprobe *kp;
  38         long offs;
  39         int i;
  40 
  41         for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
  42                 kp = get_kprobe((void *)addr - i);
  43                 /* This function only handles jump-optimized kprobe */
  44                 if (kp && kprobe_optimized(kp)) {
  45                         op = container_of(kp, struct optimized_kprobe, kp);
  46                         /* If op->list is not empty, op is under optimizing */
  47                         if (list_empty(&op->list))
  48                                 goto found;
  49                 }
  50         }
  51 
  52         return addr;
  53 found:
  54         /*
  55          * If the kprobe can be optimized, original bytes which can be
  56          * overwritten by jump destination address. In this case, original
  57          * bytes must be recovered from op->optinsn.copied_insn buffer.
  58          */
  59         if (probe_kernel_read(buf, (void *)addr,
  60                 MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
  61                 return 0UL;
  62 
  63         if (addr == (unsigned long)kp->addr) {
  64                 buf[0] = kp->opcode;
  65                 memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
  66         } else {
  67                 offs = addr - (unsigned long)kp->addr - 1;
  68                 memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
  69         }
  70 
  71         return (unsigned long)buf;
  72 }
  73 
  74 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
  75 static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
  76 {
  77 #ifdef CONFIG_X86_64
  78         *addr++ = 0x48;
  79         *addr++ = 0xbf;
  80 #else
  81         *addr++ = 0xb8;
  82 #endif
  83         *(unsigned long *)addr = val;
  84 }
  85 
  86 asm (
  87                         ".pushsection .rodata\n"
  88                         "optprobe_template_func:\n"
  89                         ".global optprobe_template_entry\n"
  90                         "optprobe_template_entry:\n"
  91 #ifdef CONFIG_X86_64
  92                         /* We don't bother saving the ss register */
  93                         "       pushq %rsp\n"
  94                         "       pushfq\n"
  95                         SAVE_REGS_STRING
  96                         "       movq %rsp, %rsi\n"
  97                         ".global optprobe_template_val\n"
  98                         "optprobe_template_val:\n"
  99                         ASM_NOP5
 100                         ASM_NOP5
 101                         ".global optprobe_template_call\n"
 102                         "optprobe_template_call:\n"
 103                         ASM_NOP5
 104                         /* Move flags to rsp */
 105                         "       movq 18*8(%rsp), %rdx\n"
 106                         "       movq %rdx, 19*8(%rsp)\n"
 107                         RESTORE_REGS_STRING
 108                         /* Skip flags entry */
 109                         "       addq $8, %rsp\n"
 110                         "       popfq\n"
 111 #else /* CONFIG_X86_32 */
 112                         "       pushl %esp\n"
 113                         "       pushfl\n"
 114                         SAVE_REGS_STRING
 115                         "       movl %esp, %edx\n"
 116                         ".global optprobe_template_val\n"
 117                         "optprobe_template_val:\n"
 118                         ASM_NOP5
 119                         ".global optprobe_template_call\n"
 120                         "optprobe_template_call:\n"
 121                         ASM_NOP5
 122                         /* Move flags into esp */
 123                         "       movl 14*4(%esp), %edx\n"
 124                         "       movl %edx, 15*4(%esp)\n"
 125                         RESTORE_REGS_STRING
 126                         /* Skip flags entry */
 127                         "       addl $4, %esp\n"
 128                         "       popfl\n"
 129 #endif
 130                         ".global optprobe_template_end\n"
 131                         "optprobe_template_end:\n"
 132                         ".popsection\n");
 133 
 134 void optprobe_template_func(void);
 135 STACK_FRAME_NON_STANDARD(optprobe_template_func);
 136 
 137 #define TMPL_MOVE_IDX \
 138         ((long)optprobe_template_val - (long)optprobe_template_entry)
 139 #define TMPL_CALL_IDX \
 140         ((long)optprobe_template_call - (long)optprobe_template_entry)
 141 #define TMPL_END_IDX \
 142         ((long)optprobe_template_end - (long)optprobe_template_entry)
 143 
 144 #define INT3_SIZE sizeof(kprobe_opcode_t)
 145 
 146 /* Optimized kprobe call back function: called from optinsn */
 147 static void
 148 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 149 {
 150         /* This is possible if op is under delayed unoptimizing */
 151         if (kprobe_disabled(&op->kp))
 152                 return;
 153 
 154         preempt_disable();
 155         if (kprobe_running()) {
 156                 kprobes_inc_nmissed_count(&op->kp);
 157         } else {
 158                 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 159                 /* Save skipped registers */
 160                 regs->cs = __KERNEL_CS;
 161 #ifdef CONFIG_X86_32
 162                 regs->cs |= get_kernel_rpl();
 163                 regs->gs = 0;
 164 #endif
 165                 regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
 166                 regs->orig_ax = ~0UL;
 167 
 168                 __this_cpu_write(current_kprobe, &op->kp);
 169                 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
 170                 opt_pre_handler(&op->kp, regs);
 171                 __this_cpu_write(current_kprobe, NULL);
 172         }
 173         preempt_enable();
 174 }
 175 NOKPROBE_SYMBOL(optimized_callback);
 176 
 177 static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
 178 {
 179         struct insn insn;
 180         int len = 0, ret;
 181 
 182         while (len < RELATIVEJUMP_SIZE) {
 183                 ret = __copy_instruction(dest + len, src + len, real + len, &insn);
 184                 if (!ret || !can_boost(&insn, src + len))
 185                         return -EINVAL;
 186                 len += ret;
 187         }
 188         /* Check whether the address range is reserved */
 189         if (ftrace_text_reserved(src, src + len - 1) ||
 190             alternatives_text_reserved(src, src + len - 1) ||
 191             jump_label_text_reserved(src, src + len - 1))
 192                 return -EBUSY;
 193 
 194         return len;
 195 }
 196 
 197 /* Check whether insn is indirect jump */
 198 static int __insn_is_indirect_jump(struct insn *insn)
 199 {
 200         return ((insn->opcode.bytes[0] == 0xff &&
 201                 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
 202                 insn->opcode.bytes[0] == 0xea); /* Segment based jump */
 203 }
 204 
 205 /* Check whether insn jumps into specified address range */
 206 static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
 207 {
 208         unsigned long target = 0;
 209 
 210         switch (insn->opcode.bytes[0]) {
 211         case 0xe0:      /* loopne */
 212         case 0xe1:      /* loope */
 213         case 0xe2:      /* loop */
 214         case 0xe3:      /* jcxz */
 215         case 0xe9:      /* near relative jump */
 216         case 0xeb:      /* short relative jump */
 217                 break;
 218         case 0x0f:
 219                 if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
 220                         break;
 221                 return 0;
 222         default:
 223                 if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
 224                         break;
 225                 return 0;
 226         }
 227         target = (unsigned long)insn->next_byte + insn->immediate.value;
 228 
 229         return (start <= target && target <= start + len);
 230 }
 231 
 232 static int insn_is_indirect_jump(struct insn *insn)
 233 {
 234         int ret = __insn_is_indirect_jump(insn);
 235 
 236 #ifdef CONFIG_RETPOLINE
 237         /*
 238          * Jump to x86_indirect_thunk_* is treated as an indirect jump.
 239          * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
 240          * older gcc may use indirect jump. So we add this check instead of
 241          * replace indirect-jump check.
 242          */
 243         if (!ret)
 244                 ret = insn_jump_into_range(insn,
 245                                 (unsigned long)__indirect_thunk_start,
 246                                 (unsigned long)__indirect_thunk_end -
 247                                 (unsigned long)__indirect_thunk_start);
 248 #endif
 249         return ret;
 250 }
 251 
 252 /* Decode whole function to ensure any instructions don't jump into target */
 253 static int can_optimize(unsigned long paddr)
 254 {
 255         unsigned long addr, size = 0, offset = 0;
 256         struct insn insn;
 257         kprobe_opcode_t buf[MAX_INSN_SIZE];
 258 
 259         /* Lookup symbol including addr */
 260         if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
 261                 return 0;
 262 
 263         /*
 264          * Do not optimize in the entry code due to the unstable
 265          * stack handling and registers setup.
 266          */
 267         if (((paddr >= (unsigned long)__entry_text_start) &&
 268              (paddr <  (unsigned long)__entry_text_end)) ||
 269             ((paddr >= (unsigned long)__irqentry_text_start) &&
 270              (paddr <  (unsigned long)__irqentry_text_end)))
 271                 return 0;
 272 
 273         /* Check there is enough space for a relative jump. */
 274         if (size - offset < RELATIVEJUMP_SIZE)
 275                 return 0;
 276 
 277         /* Decode instructions */
 278         addr = paddr - offset;
 279         while (addr < paddr - offset + size) { /* Decode until function end */
 280                 unsigned long recovered_insn;
 281                 if (search_exception_tables(addr))
 282                         /*
 283                          * Since some fixup code will jumps into this function,
 284                          * we can't optimize kprobe in this function.
 285                          */
 286                         return 0;
 287                 recovered_insn = recover_probed_instruction(buf, addr);
 288                 if (!recovered_insn)
 289                         return 0;
 290                 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
 291                 insn_get_length(&insn);
 292                 /* Another subsystem puts a breakpoint */
 293                 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
 294                         return 0;
 295                 /* Recover address */
 296                 insn.kaddr = (void *)addr;
 297                 insn.next_byte = (void *)(addr + insn.length);
 298                 /* Check any instructions don't jump into target */
 299                 if (insn_is_indirect_jump(&insn) ||
 300                     insn_jump_into_range(&insn, paddr + INT3_SIZE,
 301                                          RELATIVE_ADDR_SIZE))
 302                         return 0;
 303                 addr += insn.length;
 304         }
 305 
 306         return 1;
 307 }
 308 
 309 /* Check optimized_kprobe can actually be optimized. */
 310 int arch_check_optimized_kprobe(struct optimized_kprobe *op)
 311 {
 312         int i;
 313         struct kprobe *p;
 314 
 315         for (i = 1; i < op->optinsn.size; i++) {
 316                 p = get_kprobe(op->kp.addr + i);
 317                 if (p && !kprobe_disabled(p))
 318                         return -EEXIST;
 319         }
 320 
 321         return 0;
 322 }
 323 
 324 /* Check the addr is within the optimized instructions. */
 325 int arch_within_optimized_kprobe(struct optimized_kprobe *op,
 326                                  unsigned long addr)
 327 {
 328         return ((unsigned long)op->kp.addr <= addr &&
 329                 (unsigned long)op->kp.addr + op->optinsn.size > addr);
 330 }
 331 
 332 /* Free optimized instruction slot */
 333 static
 334 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
 335 {
 336         if (op->optinsn.insn) {
 337                 free_optinsn_slot(op->optinsn.insn, dirty);
 338                 op->optinsn.insn = NULL;
 339                 op->optinsn.size = 0;
 340         }
 341 }
 342 
 343 void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
 344 {
 345         __arch_remove_optimized_kprobe(op, 1);
 346 }
 347 
 348 /*
 349  * Copy replacing target instructions
 350  * Target instructions MUST be relocatable (checked inside)
 351  * This is called when new aggr(opt)probe is allocated or reused.
 352  */
 353 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 354                                   struct kprobe *__unused)
 355 {
 356         u8 *buf = NULL, *slot;
 357         int ret, len;
 358         long rel;
 359 
 360         if (!can_optimize((unsigned long)op->kp.addr))
 361                 return -EILSEQ;
 362 
 363         buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
 364         if (!buf)
 365                 return -ENOMEM;
 366 
 367         op->optinsn.insn = slot = get_optinsn_slot();
 368         if (!slot) {
 369                 ret = -ENOMEM;
 370                 goto out;
 371         }
 372 
 373         /*
 374          * Verify if the address gap is in 2GB range, because this uses
 375          * a relative jump.
 376          */
 377         rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
 378         if (abs(rel) > 0x7fffffff) {
 379                 ret = -ERANGE;
 380                 goto err;
 381         }
 382 
 383         /* Copy arch-dep-instance from template */
 384         memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
 385 
 386         /* Copy instructions into the out-of-line buffer */
 387         ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
 388                                           slot + TMPL_END_IDX);
 389         if (ret < 0)
 390                 goto err;
 391         op->optinsn.size = ret;
 392         len = TMPL_END_IDX + op->optinsn.size;
 393 
 394         /* Set probe information */
 395         synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
 396 
 397         /* Set probe function call */
 398         synthesize_relcall(buf + TMPL_CALL_IDX,
 399                            slot + TMPL_CALL_IDX, optimized_callback);
 400 
 401         /* Set returning jmp instruction at the tail of out-of-line buffer */
 402         synthesize_reljump(buf + len, slot + len,
 403                            (u8 *)op->kp.addr + op->optinsn.size);
 404         len += RELATIVEJUMP_SIZE;
 405 
 406         /* We have to use text_poke() for instruction buffer because it is RO */
 407         text_poke(slot, buf, len);
 408         ret = 0;
 409 out:
 410         kfree(buf);
 411         return ret;
 412 
 413 err:
 414         __arch_remove_optimized_kprobe(op, 0);
 415         goto out;
 416 }
 417 
 418 /*
 419  * Replace breakpoints (int3) with relative jumps.
 420  * Caller must call with locking kprobe_mutex and text_mutex.
 421  */
 422 void arch_optimize_kprobes(struct list_head *oplist)
 423 {
 424         struct optimized_kprobe *op, *tmp;
 425         u8 insn_buff[RELATIVEJUMP_SIZE];
 426 
 427         list_for_each_entry_safe(op, tmp, oplist, list) {
 428                 s32 rel = (s32)((long)op->optinsn.insn -
 429                         ((long)op->kp.addr + RELATIVEJUMP_SIZE));
 430 
 431                 WARN_ON(kprobe_disabled(&op->kp));
 432 
 433                 /* Backup instructions which will be replaced by jump address */
 434                 memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
 435                        RELATIVE_ADDR_SIZE);
 436 
 437                 insn_buff[0] = RELATIVEJUMP_OPCODE;
 438                 *(s32 *)(&insn_buff[1]) = rel;
 439 
 440                 text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
 441                              op->optinsn.insn);
 442 
 443                 list_del_init(&op->list);
 444         }
 445 }
 446 
 447 /* Replace a relative jump with a breakpoint (int3).  */
 448 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
 449 {
 450         u8 insn_buff[RELATIVEJUMP_SIZE];
 451 
 452         /* Set int3 to first byte for kprobes */
 453         insn_buff[0] = BREAKPOINT_INSTRUCTION;
 454         memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
 455         text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
 456                      op->optinsn.insn);
 457 }
 458 
 459 /*
 460  * Recover original instructions and breakpoints from relative jumps.
 461  * Caller must call with locking kprobe_mutex.
 462  */
 463 extern void arch_unoptimize_kprobes(struct list_head *oplist,
 464                                     struct list_head *done_list)
 465 {
 466         struct optimized_kprobe *op, *tmp;
 467 
 468         list_for_each_entry_safe(op, tmp, oplist, list) {
 469                 arch_unoptimize_kprobe(op);
 470                 list_move(&op->list, done_list);
 471         }
 472 }
 473 
 474 int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
 475 {
 476         struct optimized_kprobe *op;
 477 
 478         if (p->flags & KPROBE_FLAG_OPTIMIZED) {
 479                 /* This kprobe is really able to run optimized path. */
 480                 op = container_of(p, struct optimized_kprobe, kp);
 481                 /* Detour through copied instructions */
 482                 regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
 483                 if (!reenter)
 484                         reset_current_kprobe();
 485                 return 1;
 486         }
 487         return 0;
 488 }
 489 NOKPROBE_SYMBOL(setup_detour_execution);

/* [<][>][^][v][top][bottom][index][help] */