1/** 2 * kmemcheck - a heavyweight memory checker for the linux kernel 3 * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no> 4 * (With a lot of help from Ingo Molnar and Pekka Enberg.) 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License (version 2) as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/init.h> 12#include <linux/interrupt.h> 13#include <linux/kallsyms.h> 14#include <linux/kernel.h> 15#include <linux/kmemcheck.h> 16#include <linux/mm.h> 17#include <linux/module.h> 18#include <linux/page-flags.h> 19#include <linux/percpu.h> 20#include <linux/ptrace.h> 21#include <linux/string.h> 22#include <linux/types.h> 23 24#include <asm/cacheflush.h> 25#include <asm/kmemcheck.h> 26#include <asm/pgtable.h> 27#include <asm/tlbflush.h> 28 29#include "error.h" 30#include "opcode.h" 31#include "pte.h" 32#include "selftest.h" 33#include "shadow.h" 34 35 36#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT 37# define KMEMCHECK_ENABLED 0 38#endif 39 40#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT 41# define KMEMCHECK_ENABLED 1 42#endif 43 44#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT 45# define KMEMCHECK_ENABLED 2 46#endif 47 48int kmemcheck_enabled = KMEMCHECK_ENABLED; 49 50int __init kmemcheck_init(void) 51{ 52#ifdef CONFIG_SMP 53 /* 54 * Limit SMP to use a single CPU. We rely on the fact that this code 55 * runs before SMP is set up. 56 */ 57 if (setup_max_cpus > 1) { 58 printk(KERN_INFO 59 "kmemcheck: Limiting number of CPUs to 1.\n"); 60 setup_max_cpus = 1; 61 } 62#endif 63 64 if (!kmemcheck_selftest()) { 65 printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n"); 66 kmemcheck_enabled = 0; 67 return -EINVAL; 68 } 69 70 printk(KERN_INFO "kmemcheck: Initialized\n"); 71 return 0; 72} 73 74early_initcall(kmemcheck_init); 75 76/* 77 * We need to parse the kmemcheck= option before any memory is allocated. 78 */ 79static int __init param_kmemcheck(char *str) 80{ 81 int val; 82 int ret; 83 84 if (!str) 85 return -EINVAL; 86 87 ret = kstrtoint(str, 0, &val); 88 if (ret) 89 return ret; 90 kmemcheck_enabled = val; 91 return 0; 92} 93 94early_param("kmemcheck", param_kmemcheck); 95 96int kmemcheck_show_addr(unsigned long address) 97{ 98 pte_t *pte; 99 100 pte = kmemcheck_pte_lookup(address); 101 if (!pte) 102 return 0; 103 104 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); 105 __flush_tlb_one(address); 106 return 1; 107} 108 109int kmemcheck_hide_addr(unsigned long address) 110{ 111 pte_t *pte; 112 113 pte = kmemcheck_pte_lookup(address); 114 if (!pte) 115 return 0; 116 117 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); 118 __flush_tlb_one(address); 119 return 1; 120} 121 122struct kmemcheck_context { 123 bool busy; 124 int balance; 125 126 /* 127 * There can be at most two memory operands to an instruction, but 128 * each address can cross a page boundary -- so we may need up to 129 * four addresses that must be hidden/revealed for each fault. 130 */ 131 unsigned long addr[4]; 132 unsigned long n_addrs; 133 unsigned long flags; 134 135 /* Data size of the instruction that caused a fault. */ 136 unsigned int size; 137}; 138 139static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); 140 141bool kmemcheck_active(struct pt_regs *regs) 142{ 143 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); 144 145 return data->balance > 0; 146} 147 148/* Save an address that needs to be shown/hidden */ 149static void kmemcheck_save_addr(unsigned long addr) 150{ 151 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); 152 153 BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); 154 data->addr[data->n_addrs++] = addr; 155} 156 157static unsigned int kmemcheck_show_all(void) 158{ 159 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); 160 unsigned int i; 161 unsigned int n; 162 163 n = 0; 164 for (i = 0; i < data->n_addrs; ++i) 165 n += kmemcheck_show_addr(data->addr[i]); 166 167 return n; 168} 169 170static unsigned int kmemcheck_hide_all(void) 171{ 172 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); 173 unsigned int i; 174 unsigned int n; 175 176 n = 0; 177 for (i = 0; i < data->n_addrs; ++i) 178 n += kmemcheck_hide_addr(data->addr[i]); 179 180 return n; 181} 182 183/* 184 * Called from the #PF handler. 185 */ 186void kmemcheck_show(struct pt_regs *regs) 187{ 188 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); 189 190 BUG_ON(!irqs_disabled()); 191 192 if (unlikely(data->balance != 0)) { 193 kmemcheck_show_all(); 194 kmemcheck_error_save_bug(regs); 195 data->balance = 0; 196 return; 197 } 198 199 /* 200 * None of the addresses actually belonged to kmemcheck. Note that 201 * this is not an error. 202 */ 203 if (kmemcheck_show_all() == 0) 204 return; 205 206 ++data->balance; 207 208 /* 209 * The IF needs to be cleared as well, so that the faulting 210 * instruction can run "uninterrupted". Otherwise, we might take 211 * an interrupt and start executing that before we've had a chance 212 * to hide the page again. 213 * 214 * NOTE: In the rare case of multiple faults, we must not override 215 * the original flags: 216 */ 217 if (!(regs->flags & X86_EFLAGS_TF)) 218 data->flags = regs->flags; 219 220 regs->flags |= X86_EFLAGS_TF; 221 regs->flags &= ~X86_EFLAGS_IF; 222} 223 224/* 225 * Called from the #DB handler. 226 */ 227void kmemcheck_hide(struct pt_regs *regs) 228{ 229 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); 230 int n; 231 232 BUG_ON(!irqs_disabled()); 233 234 if (unlikely(data->balance != 1)) { 235 kmemcheck_show_all(); 236 kmemcheck_error_save_bug(regs); 237 data->n_addrs = 0; 238 data->balance = 0; 239 240 if (!(data->flags & X86_EFLAGS_TF)) 241 regs->flags &= ~X86_EFLAGS_TF; 242 if (data->flags & X86_EFLAGS_IF) 243 regs->flags |= X86_EFLAGS_IF; 244 return; 245 } 246 247 if (kmemcheck_enabled) 248 n = kmemcheck_hide_all(); 249 else 250 n = kmemcheck_show_all(); 251 252 if (n == 0) 253 return; 254 255 --data->balance; 256 257 data->n_addrs = 0; 258 259 if (!(data->flags & X86_EFLAGS_TF)) 260 regs->flags &= ~X86_EFLAGS_TF; 261 if (data->flags & X86_EFLAGS_IF) 262 regs->flags |= X86_EFLAGS_IF; 263} 264 265void kmemcheck_show_pages(struct page *p, unsigned int n) 266{ 267 unsigned int i; 268 269 for (i = 0; i < n; ++i) { 270 unsigned long address; 271 pte_t *pte; 272 unsigned int level; 273 274 address = (unsigned long) page_address(&p[i]); 275 pte = lookup_address(address, &level); 276 BUG_ON(!pte); 277 BUG_ON(level != PG_LEVEL_4K); 278 279 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); 280 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); 281 __flush_tlb_one(address); 282 } 283} 284 285bool kmemcheck_page_is_tracked(struct page *p) 286{ 287 /* This will also check the "hidden" flag of the PTE. */ 288 return kmemcheck_pte_lookup((unsigned long) page_address(p)); 289} 290 291void kmemcheck_hide_pages(struct page *p, unsigned int n) 292{ 293 unsigned int i; 294 295 for (i = 0; i < n; ++i) { 296 unsigned long address; 297 pte_t *pte; 298 unsigned int level; 299 300 address = (unsigned long) page_address(&p[i]); 301 pte = lookup_address(address, &level); 302 BUG_ON(!pte); 303 BUG_ON(level != PG_LEVEL_4K); 304 305 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); 306 set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); 307 __flush_tlb_one(address); 308 } 309} 310 311/* Access may NOT cross page boundary */ 312static void kmemcheck_read_strict(struct pt_regs *regs, 313 unsigned long addr, unsigned int size) 314{ 315 void *shadow; 316 enum kmemcheck_shadow status; 317 318 shadow = kmemcheck_shadow_lookup(addr); 319 if (!shadow) 320 return; 321 322 kmemcheck_save_addr(addr); 323 status = kmemcheck_shadow_test(shadow, size); 324 if (status == KMEMCHECK_SHADOW_INITIALIZED) 325 return; 326 327 if (kmemcheck_enabled) 328 kmemcheck_error_save(status, addr, size, regs); 329 330 if (kmemcheck_enabled == 2) 331 kmemcheck_enabled = 0; 332 333 /* Don't warn about it again. */ 334 kmemcheck_shadow_set(shadow, size); 335} 336 337bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) 338{ 339 enum kmemcheck_shadow status; 340 void *shadow; 341 342 shadow = kmemcheck_shadow_lookup(addr); 343 if (!shadow) 344 return true; 345 346 status = kmemcheck_shadow_test_all(shadow, size); 347 348 return status == KMEMCHECK_SHADOW_INITIALIZED; 349} 350 351/* Access may cross page boundary */ 352static void kmemcheck_read(struct pt_regs *regs, 353 unsigned long addr, unsigned int size) 354{ 355 unsigned long page = addr & PAGE_MASK; 356 unsigned long next_addr = addr + size - 1; 357 unsigned long next_page = next_addr & PAGE_MASK; 358 359 if (likely(page == next_page)) { 360 kmemcheck_read_strict(regs, addr, size); 361 return; 362 } 363 364 /* 365 * What we do is basically to split the access across the 366 * two pages and handle each part separately. Yes, this means 367 * that we may now see reads that are 3 + 5 bytes, for 368 * example (and if both are uninitialized, there will be two 369 * reports), but it makes the code a lot simpler. 370 */ 371 kmemcheck_read_strict(regs, addr, next_page - addr); 372 kmemcheck_read_strict(regs, next_page, next_addr - next_page); 373} 374 375static void kmemcheck_write_strict(struct pt_regs *regs, 376 unsigned long addr, unsigned int size) 377{ 378 void *shadow; 379 380 shadow = kmemcheck_shadow_lookup(addr); 381 if (!shadow) 382 return; 383 384 kmemcheck_save_addr(addr); 385 kmemcheck_shadow_set(shadow, size); 386} 387 388static void kmemcheck_write(struct pt_regs *regs, 389 unsigned long addr, unsigned int size) 390{ 391 unsigned long page = addr & PAGE_MASK; 392 unsigned long next_addr = addr + size - 1; 393 unsigned long next_page = next_addr & PAGE_MASK; 394 395 if (likely(page == next_page)) { 396 kmemcheck_write_strict(regs, addr, size); 397 return; 398 } 399 400 /* See comment in kmemcheck_read(). */ 401 kmemcheck_write_strict(regs, addr, next_page - addr); 402 kmemcheck_write_strict(regs, next_page, next_addr - next_page); 403} 404 405/* 406 * Copying is hard. We have two addresses, each of which may be split across 407 * a page (and each page will have different shadow addresses). 408 */ 409static void kmemcheck_copy(struct pt_regs *regs, 410 unsigned long src_addr, unsigned long dst_addr, unsigned int size) 411{ 412 uint8_t shadow[8]; 413 enum kmemcheck_shadow status; 414 415 unsigned long page; 416 unsigned long next_addr; 417 unsigned long next_page; 418 419 uint8_t *x; 420 unsigned int i; 421 unsigned int n; 422 423 BUG_ON(size > sizeof(shadow)); 424 425 page = src_addr & PAGE_MASK; 426 next_addr = src_addr + size - 1; 427 next_page = next_addr & PAGE_MASK; 428 429 if (likely(page == next_page)) { 430 /* Same page */ 431 x = kmemcheck_shadow_lookup(src_addr); 432 if (x) { 433 kmemcheck_save_addr(src_addr); 434 for (i = 0; i < size; ++i) 435 shadow[i] = x[i]; 436 } else { 437 for (i = 0; i < size; ++i) 438 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 439 } 440 } else { 441 n = next_page - src_addr; 442 BUG_ON(n > sizeof(shadow)); 443 444 /* First page */ 445 x = kmemcheck_shadow_lookup(src_addr); 446 if (x) { 447 kmemcheck_save_addr(src_addr); 448 for (i = 0; i < n; ++i) 449 shadow[i] = x[i]; 450 } else { 451 /* Not tracked */ 452 for (i = 0; i < n; ++i) 453 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 454 } 455 456 /* Second page */ 457 x = kmemcheck_shadow_lookup(next_page); 458 if (x) { 459 kmemcheck_save_addr(next_page); 460 for (i = n; i < size; ++i) 461 shadow[i] = x[i - n]; 462 } else { 463 /* Not tracked */ 464 for (i = n; i < size; ++i) 465 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 466 } 467 } 468 469 page = dst_addr & PAGE_MASK; 470 next_addr = dst_addr + size - 1; 471 next_page = next_addr & PAGE_MASK; 472 473 if (likely(page == next_page)) { 474 /* Same page */ 475 x = kmemcheck_shadow_lookup(dst_addr); 476 if (x) { 477 kmemcheck_save_addr(dst_addr); 478 for (i = 0; i < size; ++i) { 479 x[i] = shadow[i]; 480 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 481 } 482 } 483 } else { 484 n = next_page - dst_addr; 485 BUG_ON(n > sizeof(shadow)); 486 487 /* First page */ 488 x = kmemcheck_shadow_lookup(dst_addr); 489 if (x) { 490 kmemcheck_save_addr(dst_addr); 491 for (i = 0; i < n; ++i) { 492 x[i] = shadow[i]; 493 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 494 } 495 } 496 497 /* Second page */ 498 x = kmemcheck_shadow_lookup(next_page); 499 if (x) { 500 kmemcheck_save_addr(next_page); 501 for (i = n; i < size; ++i) { 502 x[i - n] = shadow[i]; 503 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 504 } 505 } 506 } 507 508 status = kmemcheck_shadow_test(shadow, size); 509 if (status == KMEMCHECK_SHADOW_INITIALIZED) 510 return; 511 512 if (kmemcheck_enabled) 513 kmemcheck_error_save(status, src_addr, size, regs); 514 515 if (kmemcheck_enabled == 2) 516 kmemcheck_enabled = 0; 517} 518 519enum kmemcheck_method { 520 KMEMCHECK_READ, 521 KMEMCHECK_WRITE, 522}; 523 524static void kmemcheck_access(struct pt_regs *regs, 525 unsigned long fallback_address, enum kmemcheck_method fallback_method) 526{ 527 const uint8_t *insn; 528 const uint8_t *insn_primary; 529 unsigned int size; 530 531 struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context); 532 533 /* Recursive fault -- ouch. */ 534 if (data->busy) { 535 kmemcheck_show_addr(fallback_address); 536 kmemcheck_error_save_bug(regs); 537 return; 538 } 539 540 data->busy = true; 541 542 insn = (const uint8_t *) regs->ip; 543 insn_primary = kmemcheck_opcode_get_primary(insn); 544 545 kmemcheck_opcode_decode(insn, &size); 546 547 switch (insn_primary[0]) { 548#ifdef CONFIG_KMEMCHECK_BITOPS_OK 549 /* AND, OR, XOR */ 550 /* 551 * Unfortunately, these instructions have to be excluded from 552 * our regular checking since they access only some (and not 553 * all) bits. This clears out "bogus" bitfield-access warnings. 554 */ 555 case 0x80: 556 case 0x81: 557 case 0x82: 558 case 0x83: 559 switch ((insn_primary[1] >> 3) & 7) { 560 /* OR */ 561 case 1: 562 /* AND */ 563 case 4: 564 /* XOR */ 565 case 6: 566 kmemcheck_write(regs, fallback_address, size); 567 goto out; 568 569 /* ADD */ 570 case 0: 571 /* ADC */ 572 case 2: 573 /* SBB */ 574 case 3: 575 /* SUB */ 576 case 5: 577 /* CMP */ 578 case 7: 579 break; 580 } 581 break; 582#endif 583 584 /* MOVS, MOVSB, MOVSW, MOVSD */ 585 case 0xa4: 586 case 0xa5: 587 /* 588 * These instructions are special because they take two 589 * addresses, but we only get one page fault. 590 */ 591 kmemcheck_copy(regs, regs->si, regs->di, size); 592 goto out; 593 594 /* CMPS, CMPSB, CMPSW, CMPSD */ 595 case 0xa6: 596 case 0xa7: 597 kmemcheck_read(regs, regs->si, size); 598 kmemcheck_read(regs, regs->di, size); 599 goto out; 600 } 601 602 /* 603 * If the opcode isn't special in any way, we use the data from the 604 * page fault handler to determine the address and type of memory 605 * access. 606 */ 607 switch (fallback_method) { 608 case KMEMCHECK_READ: 609 kmemcheck_read(regs, fallback_address, size); 610 goto out; 611 case KMEMCHECK_WRITE: 612 kmemcheck_write(regs, fallback_address, size); 613 goto out; 614 } 615 616out: 617 data->busy = false; 618} 619 620bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, 621 unsigned long error_code) 622{ 623 pte_t *pte; 624 625 /* 626 * XXX: Is it safe to assume that memory accesses from virtual 86 627 * mode or non-kernel code segments will _never_ access kernel 628 * memory (e.g. tracked pages)? For now, we need this to avoid 629 * invoking kmemcheck for PnP BIOS calls. 630 */ 631 if (regs->flags & X86_VM_MASK) 632 return false; 633 if (regs->cs != __KERNEL_CS) 634 return false; 635 636 pte = kmemcheck_pte_lookup(address); 637 if (!pte) 638 return false; 639 640 WARN_ON_ONCE(in_nmi()); 641 642 if (error_code & 2) 643 kmemcheck_access(regs, address, KMEMCHECK_WRITE); 644 else 645 kmemcheck_access(regs, address, KMEMCHECK_READ); 646 647 kmemcheck_show(regs); 648 return true; 649} 650 651bool kmemcheck_trap(struct pt_regs *regs) 652{ 653 if (!kmemcheck_active(regs)) 654 return false; 655 656 /* We're done. */ 657 kmemcheck_hide(regs); 658 return true; 659} 660