1/**
2 * kmemcheck - a heavyweight memory checker for the linux kernel
3 * Copyright (C) 2007, 2008  Vegard Nossum <vegardno@ifi.uio.no>
4 * (With a lot of help from Ingo Molnar and Pekka Enberg.)
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2) as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/init.h>
12#include <linux/interrupt.h>
13#include <linux/kallsyms.h>
14#include <linux/kernel.h>
15#include <linux/kmemcheck.h>
16#include <linux/mm.h>
17#include <linux/module.h>
18#include <linux/page-flags.h>
19#include <linux/percpu.h>
20#include <linux/ptrace.h>
21#include <linux/string.h>
22#include <linux/types.h>
23
24#include <asm/cacheflush.h>
25#include <asm/kmemcheck.h>
26#include <asm/pgtable.h>
27#include <asm/tlbflush.h>
28
29#include "error.h"
30#include "opcode.h"
31#include "pte.h"
32#include "selftest.h"
33#include "shadow.h"
34
35
36#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
37#  define KMEMCHECK_ENABLED 0
38#endif
39
40#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
41#  define KMEMCHECK_ENABLED 1
42#endif
43
44#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
45#  define KMEMCHECK_ENABLED 2
46#endif
47
48int kmemcheck_enabled = KMEMCHECK_ENABLED;
49
50int __init kmemcheck_init(void)
51{
52#ifdef CONFIG_SMP
53	/*
54	 * Limit SMP to use a single CPU. We rely on the fact that this code
55	 * runs before SMP is set up.
56	 */
57	if (setup_max_cpus > 1) {
58		printk(KERN_INFO
59			"kmemcheck: Limiting number of CPUs to 1.\n");
60		setup_max_cpus = 1;
61	}
62#endif
63
64	if (!kmemcheck_selftest()) {
65		printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
66		kmemcheck_enabled = 0;
67		return -EINVAL;
68	}
69
70	printk(KERN_INFO "kmemcheck: Initialized\n");
71	return 0;
72}
73
74early_initcall(kmemcheck_init);
75
76/*
77 * We need to parse the kmemcheck= option before any memory is allocated.
78 */
79static int __init param_kmemcheck(char *str)
80{
81	int val;
82	int ret;
83
84	if (!str)
85		return -EINVAL;
86
87	ret = kstrtoint(str, 0, &val);
88	if (ret)
89		return ret;
90	kmemcheck_enabled = val;
91	return 0;
92}
93
94early_param("kmemcheck", param_kmemcheck);
95
96int kmemcheck_show_addr(unsigned long address)
97{
98	pte_t *pte;
99
100	pte = kmemcheck_pte_lookup(address);
101	if (!pte)
102		return 0;
103
104	set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
105	__flush_tlb_one(address);
106	return 1;
107}
108
109int kmemcheck_hide_addr(unsigned long address)
110{
111	pte_t *pte;
112
113	pte = kmemcheck_pte_lookup(address);
114	if (!pte)
115		return 0;
116
117	set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
118	__flush_tlb_one(address);
119	return 1;
120}
121
122struct kmemcheck_context {
123	bool busy;
124	int balance;
125
126	/*
127	 * There can be at most two memory operands to an instruction, but
128	 * each address can cross a page boundary -- so we may need up to
129	 * four addresses that must be hidden/revealed for each fault.
130	 */
131	unsigned long addr[4];
132	unsigned long n_addrs;
133	unsigned long flags;
134
135	/* Data size of the instruction that caused a fault. */
136	unsigned int size;
137};
138
139static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
140
141bool kmemcheck_active(struct pt_regs *regs)
142{
143	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
144
145	return data->balance > 0;
146}
147
148/* Save an address that needs to be shown/hidden */
149static void kmemcheck_save_addr(unsigned long addr)
150{
151	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
152
153	BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
154	data->addr[data->n_addrs++] = addr;
155}
156
157static unsigned int kmemcheck_show_all(void)
158{
159	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
160	unsigned int i;
161	unsigned int n;
162
163	n = 0;
164	for (i = 0; i < data->n_addrs; ++i)
165		n += kmemcheck_show_addr(data->addr[i]);
166
167	return n;
168}
169
170static unsigned int kmemcheck_hide_all(void)
171{
172	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
173	unsigned int i;
174	unsigned int n;
175
176	n = 0;
177	for (i = 0; i < data->n_addrs; ++i)
178		n += kmemcheck_hide_addr(data->addr[i]);
179
180	return n;
181}
182
183/*
184 * Called from the #PF handler.
185 */
186void kmemcheck_show(struct pt_regs *regs)
187{
188	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
189
190	BUG_ON(!irqs_disabled());
191
192	if (unlikely(data->balance != 0)) {
193		kmemcheck_show_all();
194		kmemcheck_error_save_bug(regs);
195		data->balance = 0;
196		return;
197	}
198
199	/*
200	 * None of the addresses actually belonged to kmemcheck. Note that
201	 * this is not an error.
202	 */
203	if (kmemcheck_show_all() == 0)
204		return;
205
206	++data->balance;
207
208	/*
209	 * The IF needs to be cleared as well, so that the faulting
210	 * instruction can run "uninterrupted". Otherwise, we might take
211	 * an interrupt and start executing that before we've had a chance
212	 * to hide the page again.
213	 *
214	 * NOTE: In the rare case of multiple faults, we must not override
215	 * the original flags:
216	 */
217	if (!(regs->flags & X86_EFLAGS_TF))
218		data->flags = regs->flags;
219
220	regs->flags |= X86_EFLAGS_TF;
221	regs->flags &= ~X86_EFLAGS_IF;
222}
223
224/*
225 * Called from the #DB handler.
226 */
227void kmemcheck_hide(struct pt_regs *regs)
228{
229	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
230	int n;
231
232	BUG_ON(!irqs_disabled());
233
234	if (unlikely(data->balance != 1)) {
235		kmemcheck_show_all();
236		kmemcheck_error_save_bug(regs);
237		data->n_addrs = 0;
238		data->balance = 0;
239
240		if (!(data->flags & X86_EFLAGS_TF))
241			regs->flags &= ~X86_EFLAGS_TF;
242		if (data->flags & X86_EFLAGS_IF)
243			regs->flags |= X86_EFLAGS_IF;
244		return;
245	}
246
247	if (kmemcheck_enabled)
248		n = kmemcheck_hide_all();
249	else
250		n = kmemcheck_show_all();
251
252	if (n == 0)
253		return;
254
255	--data->balance;
256
257	data->n_addrs = 0;
258
259	if (!(data->flags & X86_EFLAGS_TF))
260		regs->flags &= ~X86_EFLAGS_TF;
261	if (data->flags & X86_EFLAGS_IF)
262		regs->flags |= X86_EFLAGS_IF;
263}
264
265void kmemcheck_show_pages(struct page *p, unsigned int n)
266{
267	unsigned int i;
268
269	for (i = 0; i < n; ++i) {
270		unsigned long address;
271		pte_t *pte;
272		unsigned int level;
273
274		address = (unsigned long) page_address(&p[i]);
275		pte = lookup_address(address, &level);
276		BUG_ON(!pte);
277		BUG_ON(level != PG_LEVEL_4K);
278
279		set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
280		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
281		__flush_tlb_one(address);
282	}
283}
284
285bool kmemcheck_page_is_tracked(struct page *p)
286{
287	/* This will also check the "hidden" flag of the PTE. */
288	return kmemcheck_pte_lookup((unsigned long) page_address(p));
289}
290
291void kmemcheck_hide_pages(struct page *p, unsigned int n)
292{
293	unsigned int i;
294
295	for (i = 0; i < n; ++i) {
296		unsigned long address;
297		pte_t *pte;
298		unsigned int level;
299
300		address = (unsigned long) page_address(&p[i]);
301		pte = lookup_address(address, &level);
302		BUG_ON(!pte);
303		BUG_ON(level != PG_LEVEL_4K);
304
305		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
306		set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
307		__flush_tlb_one(address);
308	}
309}
310
311/* Access may NOT cross page boundary */
312static void kmemcheck_read_strict(struct pt_regs *regs,
313	unsigned long addr, unsigned int size)
314{
315	void *shadow;
316	enum kmemcheck_shadow status;
317
318	shadow = kmemcheck_shadow_lookup(addr);
319	if (!shadow)
320		return;
321
322	kmemcheck_save_addr(addr);
323	status = kmemcheck_shadow_test(shadow, size);
324	if (status == KMEMCHECK_SHADOW_INITIALIZED)
325		return;
326
327	if (kmemcheck_enabled)
328		kmemcheck_error_save(status, addr, size, regs);
329
330	if (kmemcheck_enabled == 2)
331		kmemcheck_enabled = 0;
332
333	/* Don't warn about it again. */
334	kmemcheck_shadow_set(shadow, size);
335}
336
337bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
338{
339	enum kmemcheck_shadow status;
340	void *shadow;
341
342	shadow = kmemcheck_shadow_lookup(addr);
343	if (!shadow)
344		return true;
345
346	status = kmemcheck_shadow_test_all(shadow, size);
347
348	return status == KMEMCHECK_SHADOW_INITIALIZED;
349}
350
351/* Access may cross page boundary */
352static void kmemcheck_read(struct pt_regs *regs,
353	unsigned long addr, unsigned int size)
354{
355	unsigned long page = addr & PAGE_MASK;
356	unsigned long next_addr = addr + size - 1;
357	unsigned long next_page = next_addr & PAGE_MASK;
358
359	if (likely(page == next_page)) {
360		kmemcheck_read_strict(regs, addr, size);
361		return;
362	}
363
364	/*
365	 * What we do is basically to split the access across the
366	 * two pages and handle each part separately. Yes, this means
367	 * that we may now see reads that are 3 + 5 bytes, for
368	 * example (and if both are uninitialized, there will be two
369	 * reports), but it makes the code a lot simpler.
370	 */
371	kmemcheck_read_strict(regs, addr, next_page - addr);
372	kmemcheck_read_strict(regs, next_page, next_addr - next_page);
373}
374
375static void kmemcheck_write_strict(struct pt_regs *regs,
376	unsigned long addr, unsigned int size)
377{
378	void *shadow;
379
380	shadow = kmemcheck_shadow_lookup(addr);
381	if (!shadow)
382		return;
383
384	kmemcheck_save_addr(addr);
385	kmemcheck_shadow_set(shadow, size);
386}
387
388static void kmemcheck_write(struct pt_regs *regs,
389	unsigned long addr, unsigned int size)
390{
391	unsigned long page = addr & PAGE_MASK;
392	unsigned long next_addr = addr + size - 1;
393	unsigned long next_page = next_addr & PAGE_MASK;
394
395	if (likely(page == next_page)) {
396		kmemcheck_write_strict(regs, addr, size);
397		return;
398	}
399
400	/* See comment in kmemcheck_read(). */
401	kmemcheck_write_strict(regs, addr, next_page - addr);
402	kmemcheck_write_strict(regs, next_page, next_addr - next_page);
403}
404
405/*
406 * Copying is hard. We have two addresses, each of which may be split across
407 * a page (and each page will have different shadow addresses).
408 */
409static void kmemcheck_copy(struct pt_regs *regs,
410	unsigned long src_addr, unsigned long dst_addr, unsigned int size)
411{
412	uint8_t shadow[8];
413	enum kmemcheck_shadow status;
414
415	unsigned long page;
416	unsigned long next_addr;
417	unsigned long next_page;
418
419	uint8_t *x;
420	unsigned int i;
421	unsigned int n;
422
423	BUG_ON(size > sizeof(shadow));
424
425	page = src_addr & PAGE_MASK;
426	next_addr = src_addr + size - 1;
427	next_page = next_addr & PAGE_MASK;
428
429	if (likely(page == next_page)) {
430		/* Same page */
431		x = kmemcheck_shadow_lookup(src_addr);
432		if (x) {
433			kmemcheck_save_addr(src_addr);
434			for (i = 0; i < size; ++i)
435				shadow[i] = x[i];
436		} else {
437			for (i = 0; i < size; ++i)
438				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
439		}
440	} else {
441		n = next_page - src_addr;
442		BUG_ON(n > sizeof(shadow));
443
444		/* First page */
445		x = kmemcheck_shadow_lookup(src_addr);
446		if (x) {
447			kmemcheck_save_addr(src_addr);
448			for (i = 0; i < n; ++i)
449				shadow[i] = x[i];
450		} else {
451			/* Not tracked */
452			for (i = 0; i < n; ++i)
453				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
454		}
455
456		/* Second page */
457		x = kmemcheck_shadow_lookup(next_page);
458		if (x) {
459			kmemcheck_save_addr(next_page);
460			for (i = n; i < size; ++i)
461				shadow[i] = x[i - n];
462		} else {
463			/* Not tracked */
464			for (i = n; i < size; ++i)
465				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
466		}
467	}
468
469	page = dst_addr & PAGE_MASK;
470	next_addr = dst_addr + size - 1;
471	next_page = next_addr & PAGE_MASK;
472
473	if (likely(page == next_page)) {
474		/* Same page */
475		x = kmemcheck_shadow_lookup(dst_addr);
476		if (x) {
477			kmemcheck_save_addr(dst_addr);
478			for (i = 0; i < size; ++i) {
479				x[i] = shadow[i];
480				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
481			}
482		}
483	} else {
484		n = next_page - dst_addr;
485		BUG_ON(n > sizeof(shadow));
486
487		/* First page */
488		x = kmemcheck_shadow_lookup(dst_addr);
489		if (x) {
490			kmemcheck_save_addr(dst_addr);
491			for (i = 0; i < n; ++i) {
492				x[i] = shadow[i];
493				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
494			}
495		}
496
497		/* Second page */
498		x = kmemcheck_shadow_lookup(next_page);
499		if (x) {
500			kmemcheck_save_addr(next_page);
501			for (i = n; i < size; ++i) {
502				x[i - n] = shadow[i];
503				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
504			}
505		}
506	}
507
508	status = kmemcheck_shadow_test(shadow, size);
509	if (status == KMEMCHECK_SHADOW_INITIALIZED)
510		return;
511
512	if (kmemcheck_enabled)
513		kmemcheck_error_save(status, src_addr, size, regs);
514
515	if (kmemcheck_enabled == 2)
516		kmemcheck_enabled = 0;
517}
518
519enum kmemcheck_method {
520	KMEMCHECK_READ,
521	KMEMCHECK_WRITE,
522};
523
524static void kmemcheck_access(struct pt_regs *regs,
525	unsigned long fallback_address, enum kmemcheck_method fallback_method)
526{
527	const uint8_t *insn;
528	const uint8_t *insn_primary;
529	unsigned int size;
530
531	struct kmemcheck_context *data = this_cpu_ptr(&kmemcheck_context);
532
533	/* Recursive fault -- ouch. */
534	if (data->busy) {
535		kmemcheck_show_addr(fallback_address);
536		kmemcheck_error_save_bug(regs);
537		return;
538	}
539
540	data->busy = true;
541
542	insn = (const uint8_t *) regs->ip;
543	insn_primary = kmemcheck_opcode_get_primary(insn);
544
545	kmemcheck_opcode_decode(insn, &size);
546
547	switch (insn_primary[0]) {
548#ifdef CONFIG_KMEMCHECK_BITOPS_OK
549		/* AND, OR, XOR */
550		/*
551		 * Unfortunately, these instructions have to be excluded from
552		 * our regular checking since they access only some (and not
553		 * all) bits. This clears out "bogus" bitfield-access warnings.
554		 */
555	case 0x80:
556	case 0x81:
557	case 0x82:
558	case 0x83:
559		switch ((insn_primary[1] >> 3) & 7) {
560			/* OR */
561		case 1:
562			/* AND */
563		case 4:
564			/* XOR */
565		case 6:
566			kmemcheck_write(regs, fallback_address, size);
567			goto out;
568
569			/* ADD */
570		case 0:
571			/* ADC */
572		case 2:
573			/* SBB */
574		case 3:
575			/* SUB */
576		case 5:
577			/* CMP */
578		case 7:
579			break;
580		}
581		break;
582#endif
583
584		/* MOVS, MOVSB, MOVSW, MOVSD */
585	case 0xa4:
586	case 0xa5:
587		/*
588		 * These instructions are special because they take two
589		 * addresses, but we only get one page fault.
590		 */
591		kmemcheck_copy(regs, regs->si, regs->di, size);
592		goto out;
593
594		/* CMPS, CMPSB, CMPSW, CMPSD */
595	case 0xa6:
596	case 0xa7:
597		kmemcheck_read(regs, regs->si, size);
598		kmemcheck_read(regs, regs->di, size);
599		goto out;
600	}
601
602	/*
603	 * If the opcode isn't special in any way, we use the data from the
604	 * page fault handler to determine the address and type of memory
605	 * access.
606	 */
607	switch (fallback_method) {
608	case KMEMCHECK_READ:
609		kmemcheck_read(regs, fallback_address, size);
610		goto out;
611	case KMEMCHECK_WRITE:
612		kmemcheck_write(regs, fallback_address, size);
613		goto out;
614	}
615
616out:
617	data->busy = false;
618}
619
620bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
621	unsigned long error_code)
622{
623	pte_t *pte;
624
625	/*
626	 * XXX: Is it safe to assume that memory accesses from virtual 86
627	 * mode or non-kernel code segments will _never_ access kernel
628	 * memory (e.g. tracked pages)? For now, we need this to avoid
629	 * invoking kmemcheck for PnP BIOS calls.
630	 */
631	if (regs->flags & X86_VM_MASK)
632		return false;
633	if (regs->cs != __KERNEL_CS)
634		return false;
635
636	pte = kmemcheck_pte_lookup(address);
637	if (!pte)
638		return false;
639
640	WARN_ON_ONCE(in_nmi());
641
642	if (error_code & 2)
643		kmemcheck_access(regs, address, KMEMCHECK_WRITE);
644	else
645		kmemcheck_access(regs, address, KMEMCHECK_READ);
646
647	kmemcheck_show(regs);
648	return true;
649}
650
651bool kmemcheck_trap(struct pt_regs *regs)
652{
653	if (!kmemcheck_active(regs))
654		return false;
655
656	/* We're done. */
657	kmemcheck_hide(regs);
658	return true;
659}
660