1/*
2 * Copyright (C) 2009 Wind River Systems Inc
3 *   Implemented by fredrik.markstrom@gmail.com and ivarholmqvist@gmail.com
4 *
5 * based on arch/mips/mm/fault.c which is:
6 *
7 * Copyright (C) 1995-2000 Ralf Baechle
8 *
9 * This file is subject to the terms and conditions of the GNU General Public
10 * License.  See the file "COPYING" in the main directory of this archive
11 * for more details.
12 */
13
14#include <linux/signal.h>
15#include <linux/sched.h>
16#include <linux/interrupt.h>
17#include <linux/kernel.h>
18#include <linux/errno.h>
19#include <linux/string.h>
20#include <linux/types.h>
21#include <linux/ptrace.h>
22#include <linux/mman.h>
23#include <linux/mm.h>
24#include <linux/module.h>
25#include <linux/uaccess.h>
26#include <linux/ptrace.h>
27
28#include <asm/mmu_context.h>
29#include <asm/traps.h>
30
31#define EXC_SUPERV_INSN_ACCESS	9  /* Supervisor only instruction address */
32#define EXC_SUPERV_DATA_ACCESS	11 /* Supervisor only data address */
33#define EXC_X_PROTECTION_FAULT	13 /* TLB permission violation (x) */
34#define EXC_R_PROTECTION_FAULT	14 /* TLB permission violation (r) */
35#define EXC_W_PROTECTION_FAULT	15 /* TLB permission violation (w) */
36
37/*
38 * This routine handles page faults.  It determines the address,
39 * and the problem, and then passes it off to one of the appropriate
40 * routines.
41 */
42asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
43				unsigned long address)
44{
45	struct vm_area_struct *vma = NULL;
46	struct task_struct *tsk = current;
47	struct mm_struct *mm = tsk->mm;
48	int code = SEGV_MAPERR;
49	int fault;
50	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
51
52	cause >>= 2;
53
54	/* Restart the instruction */
55	regs->ea -= 4;
56
57	/*
58	 * We fault-in kernel-space virtual memory on-demand. The
59	 * 'reference' page table is init_mm.pgd.
60	 *
61	 * NOTE! We MUST NOT take any locks for this case. We may
62	 * be in an interrupt or a critical region, and should
63	 * only copy the information from the master page table,
64	 * nothing more.
65	 */
66	if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) {
67		if (user_mode(regs))
68			goto bad_area_nosemaphore;
69		else
70			goto vmalloc_fault;
71	}
72
73	if (unlikely(address >= TASK_SIZE))
74		goto bad_area_nosemaphore;
75
76	/*
77	 * If we're in an interrupt or have no user
78	 * context, we must not take the fault..
79	 */
80	if (in_atomic() || !mm)
81		goto bad_area_nosemaphore;
82
83	if (user_mode(regs))
84		flags |= FAULT_FLAG_USER;
85
86	if (!down_read_trylock(&mm->mmap_sem)) {
87		if (!user_mode(regs) && !search_exception_tables(regs->ea))
88			goto bad_area_nosemaphore;
89retry:
90		down_read(&mm->mmap_sem);
91	}
92
93	vma = find_vma(mm, address);
94	if (!vma)
95		goto bad_area;
96	if (vma->vm_start <= address)
97		goto good_area;
98	if (!(vma->vm_flags & VM_GROWSDOWN))
99		goto bad_area;
100	if (expand_stack(vma, address))
101		goto bad_area;
102/*
103 * Ok, we have a good vm_area for this memory access, so
104 * we can handle it..
105 */
106good_area:
107	code = SEGV_ACCERR;
108
109	switch (cause) {
110	case EXC_SUPERV_INSN_ACCESS:
111		goto bad_area;
112	case EXC_SUPERV_DATA_ACCESS:
113		goto bad_area;
114	case EXC_X_PROTECTION_FAULT:
115		if (!(vma->vm_flags & VM_EXEC))
116			goto bad_area;
117		break;
118	case EXC_R_PROTECTION_FAULT:
119		if (!(vma->vm_flags & VM_READ))
120			goto bad_area;
121		break;
122	case EXC_W_PROTECTION_FAULT:
123		if (!(vma->vm_flags & VM_WRITE))
124			goto bad_area;
125		flags = FAULT_FLAG_WRITE;
126		break;
127	}
128
129	/*
130	 * If for any reason at all we couldn't handle the fault,
131	 * make sure we exit gracefully rather than endlessly redo
132	 * the fault.
133	 */
134	fault = handle_mm_fault(mm, vma, address, flags);
135
136	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
137		return;
138
139	if (unlikely(fault & VM_FAULT_ERROR)) {
140		if (fault & VM_FAULT_OOM)
141			goto out_of_memory;
142		else if (fault & VM_FAULT_SIGSEGV)
143			goto bad_area;
144		else if (fault & VM_FAULT_SIGBUS)
145			goto do_sigbus;
146		BUG();
147	}
148
149	/*
150	 * Major/minor page fault accounting is only done on the
151	 * initial attempt. If we go through a retry, it is extremely
152	 * likely that the page will be found in page cache at that point.
153	 */
154	if (flags & FAULT_FLAG_ALLOW_RETRY) {
155		if (fault & VM_FAULT_MAJOR)
156			current->maj_flt++;
157		else
158			current->min_flt++;
159		if (fault & VM_FAULT_RETRY) {
160			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
161			 * of starvation. */
162			flags &= ~FAULT_FLAG_ALLOW_RETRY;
163			flags |= FAULT_FLAG_TRIED;
164
165			/*
166			 * No need to up_read(&mm->mmap_sem) as we would
167			 * have already released it in __lock_page_or_retry
168			 * in mm/filemap.c.
169			 */
170
171			goto retry;
172		}
173	}
174
175	up_read(&mm->mmap_sem);
176	return;
177
178/*
179 * Something tried to access memory that isn't in our memory map..
180 * Fix it, but check if it's kernel or user first..
181 */
182bad_area:
183	up_read(&mm->mmap_sem);
184
185bad_area_nosemaphore:
186	/* User mode accesses just cause a SIGSEGV */
187	if (user_mode(regs)) {
188		if (unhandled_signal(current, SIGSEGV) && printk_ratelimit()) {
189			pr_info("%s: unhandled page fault (%d) at 0x%08lx, "
190				"cause %ld\n", current->comm, SIGSEGV, address, cause);
191			show_regs(regs);
192		}
193		_exception(SIGSEGV, regs, code, address);
194		return;
195	}
196
197no_context:
198	/* Are we prepared to handle this kernel fault? */
199	if (fixup_exception(regs))
200		return;
201
202	/*
203	 * Oops. The kernel tried to access some bad page. We'll have to
204	 * terminate things with extreme prejudice.
205	 */
206	bust_spinlocks(1);
207
208	pr_alert("Unable to handle kernel %s at virtual address %08lx",
209		address < PAGE_SIZE ? "NULL pointer dereference" :
210		"paging request", address);
211	pr_alert("ea = %08lx, ra = %08lx, cause = %ld\n", regs->ea, regs->ra,
212		cause);
213	panic("Oops");
214	return;
215
216/*
217 * We ran out of memory, or some other thing happened to us that made
218 * us unable to handle the page fault gracefully.
219 */
220out_of_memory:
221	up_read(&mm->mmap_sem);
222	if (!user_mode(regs))
223		goto no_context;
224	pagefault_out_of_memory();
225	return;
226
227do_sigbus:
228	up_read(&mm->mmap_sem);
229
230	/* Kernel mode? Handle exceptions or die */
231	if (!user_mode(regs))
232		goto no_context;
233
234	_exception(SIGBUS, regs, BUS_ADRERR, address);
235	return;
236
237vmalloc_fault:
238	{
239		/*
240		 * Synchronize this task's top level page-table
241		 * with the 'reference' page table.
242		 *
243		 * Do _not_ use "tsk" here. We might be inside
244		 * an interrupt in the middle of a task switch..
245		 */
246		int offset = pgd_index(address);
247		pgd_t *pgd, *pgd_k;
248		pud_t *pud, *pud_k;
249		pmd_t *pmd, *pmd_k;
250		pte_t *pte_k;
251
252		pgd = pgd_current + offset;
253		pgd_k = init_mm.pgd + offset;
254
255		if (!pgd_present(*pgd_k))
256			goto no_context;
257		set_pgd(pgd, *pgd_k);
258
259		pud = pud_offset(pgd, address);
260		pud_k = pud_offset(pgd_k, address);
261		if (!pud_present(*pud_k))
262			goto no_context;
263		pmd = pmd_offset(pud, address);
264		pmd_k = pmd_offset(pud_k, address);
265		if (!pmd_present(*pmd_k))
266			goto no_context;
267		set_pmd(pmd, *pmd_k);
268
269		pte_k = pte_offset_kernel(pmd_k, address);
270		if (!pte_present(*pte_k))
271			goto no_context;
272
273		flush_tlb_one(address);
274		return;
275	}
276}
277