1/*
2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#include <linux/mm.h>
7#include <linux/sched.h>
8#include <linux/hardirq.h>
9#include <linux/module.h>
10#include <asm/current.h>
11#include <asm/pgtable.h>
12#include <asm/tlbflush.h>
13#include <arch.h>
14#include <as-layout.h>
15#include <kern_util.h>
16#include <os.h>
17#include <skas.h>
18
19/*
20 * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
21 * segv().
22 */
23int handle_page_fault(unsigned long address, unsigned long ip,
24		      int is_write, int is_user, int *code_out)
25{
26	struct mm_struct *mm = current->mm;
27	struct vm_area_struct *vma;
28	pgd_t *pgd;
29	pud_t *pud;
30	pmd_t *pmd;
31	pte_t *pte;
32	int err = -EFAULT;
33	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
34
35	*code_out = SEGV_MAPERR;
36
37	/*
38	 * If the fault was during atomic operation, don't take the fault, just
39	 * fail.
40	 */
41	if (in_atomic())
42		goto out_nosemaphore;
43
44	if (is_user)
45		flags |= FAULT_FLAG_USER;
46retry:
47	down_read(&mm->mmap_sem);
48	vma = find_vma(mm, address);
49	if (!vma)
50		goto out;
51	else if (vma->vm_start <= address)
52		goto good_area;
53	else if (!(vma->vm_flags & VM_GROWSDOWN))
54		goto out;
55	else if (is_user && !ARCH_IS_STACKGROW(address))
56		goto out;
57	else if (expand_stack(vma, address))
58		goto out;
59
60good_area:
61	*code_out = SEGV_ACCERR;
62	if (is_write) {
63		if (!(vma->vm_flags & VM_WRITE))
64			goto out;
65		flags |= FAULT_FLAG_WRITE;
66	} else {
67		/* Don't require VM_READ|VM_EXEC for write faults! */
68		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
69			goto out;
70	}
71
72	do {
73		int fault;
74
75		fault = handle_mm_fault(mm, vma, address, flags);
76
77		if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
78			goto out_nosemaphore;
79
80		if (unlikely(fault & VM_FAULT_ERROR)) {
81			if (fault & VM_FAULT_OOM) {
82				goto out_of_memory;
83			} else if (fault & VM_FAULT_SIGSEGV) {
84				goto out;
85			} else if (fault & VM_FAULT_SIGBUS) {
86				err = -EACCES;
87				goto out;
88			}
89			BUG();
90		}
91		if (flags & FAULT_FLAG_ALLOW_RETRY) {
92			if (fault & VM_FAULT_MAJOR)
93				current->maj_flt++;
94			else
95				current->min_flt++;
96			if (fault & VM_FAULT_RETRY) {
97				flags &= ~FAULT_FLAG_ALLOW_RETRY;
98				flags |= FAULT_FLAG_TRIED;
99
100				goto retry;
101			}
102		}
103
104		pgd = pgd_offset(mm, address);
105		pud = pud_offset(pgd, address);
106		pmd = pmd_offset(pud, address);
107		pte = pte_offset_kernel(pmd, address);
108	} while (!pte_present(*pte));
109	err = 0;
110	/*
111	 * The below warning was added in place of
112	 *	pte_mkyoung(); if (is_write) pte_mkdirty();
113	 * If it's triggered, we'd see normally a hang here (a clean pte is
114	 * marked read-only to emulate the dirty bit).
115	 * However, the generic code can mark a PTE writable but clean on a
116	 * concurrent read fault, triggering this harmlessly. So comment it out.
117	 */
118#if 0
119	WARN_ON(!pte_young(*pte) || (is_write && !pte_dirty(*pte)));
120#endif
121	flush_tlb_page(vma, address);
122out:
123	up_read(&mm->mmap_sem);
124out_nosemaphore:
125	return err;
126
127out_of_memory:
128	/*
129	 * We ran out of memory, call the OOM killer, and return the userspace
130	 * (which will retry the fault, or kill us if we got oom-killed).
131	 */
132	up_read(&mm->mmap_sem);
133	if (!is_user)
134		goto out_nosemaphore;
135	pagefault_out_of_memory();
136	return 0;
137}
138EXPORT_SYMBOL(handle_page_fault);
139
140static void show_segv_info(struct uml_pt_regs *regs)
141{
142	struct task_struct *tsk = current;
143	struct faultinfo *fi = UPT_FAULTINFO(regs);
144
145	if (!unhandled_signal(tsk, SIGSEGV))
146		return;
147
148	if (!printk_ratelimit())
149		return;
150
151	printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
152		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
153		tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
154		(void *)UPT_IP(regs), (void *)UPT_SP(regs),
155		fi->error_code);
156
157	print_vma_addr(KERN_CONT " in ", UPT_IP(regs));
158	printk(KERN_CONT "\n");
159}
160
161static void bad_segv(struct faultinfo fi, unsigned long ip)
162{
163	struct siginfo si;
164
165	si.si_signo = SIGSEGV;
166	si.si_code = SEGV_ACCERR;
167	si.si_addr = (void __user *) FAULT_ADDRESS(fi);
168	current->thread.arch.faultinfo = fi;
169	force_sig_info(SIGSEGV, &si, current);
170}
171
172void fatal_sigsegv(void)
173{
174	force_sigsegv(SIGSEGV, current);
175	do_signal();
176	/*
177	 * This is to tell gcc that we're not returning - do_signal
178	 * can, in general, return, but in this case, it's not, since
179	 * we just got a fatal SIGSEGV queued.
180	 */
181	os_dump_core();
182}
183
184void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
185{
186	struct faultinfo * fi = UPT_FAULTINFO(regs);
187
188	if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) {
189		show_segv_info(regs);
190		bad_segv(*fi, UPT_IP(regs));
191		return;
192	}
193	segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
194}
195
196/*
197 * We give a *copy* of the faultinfo in the regs to segv.
198 * This must be done, since nesting SEGVs could overwrite
199 * the info in the regs. A pointer to the info then would
200 * give us bad data!
201 */
202unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
203		   struct uml_pt_regs *regs)
204{
205	struct siginfo si;
206	jmp_buf *catcher;
207	int err;
208	int is_write = FAULT_WRITE(fi);
209	unsigned long address = FAULT_ADDRESS(fi);
210
211	if (!is_user && regs)
212		current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
213
214	if (!is_user && (address >= start_vm) && (address < end_vm)) {
215		flush_tlb_kernel_vm();
216		goto out;
217	}
218	else if (current->mm == NULL) {
219		show_regs(container_of(regs, struct pt_regs, regs));
220		panic("Segfault with no mm");
221	}
222
223	if (SEGV_IS_FIXABLE(&fi))
224		err = handle_page_fault(address, ip, is_write, is_user,
225					&si.si_code);
226	else {
227		err = -EFAULT;
228		/*
229		 * A thread accessed NULL, we get a fault, but CR2 is invalid.
230		 * This code is used in __do_copy_from_user() of TT mode.
231		 * XXX tt mode is gone, so maybe this isn't needed any more
232		 */
233		address = 0;
234	}
235
236	catcher = current->thread.fault_catcher;
237	if (!err)
238		goto out;
239	else if (catcher != NULL) {
240		current->thread.fault_addr = (void *) address;
241		UML_LONGJMP(catcher, 1);
242	}
243	else if (current->thread.fault_addr != NULL)
244		panic("fault_addr set but no fault catcher");
245	else if (!is_user && arch_fixup(ip, regs))
246		goto out;
247
248	if (!is_user) {
249		show_regs(container_of(regs, struct pt_regs, regs));
250		panic("Kernel mode fault at addr 0x%lx, ip 0x%lx",
251		      address, ip);
252	}
253
254	show_segv_info(regs);
255
256	if (err == -EACCES) {
257		si.si_signo = SIGBUS;
258		si.si_errno = 0;
259		si.si_code = BUS_ADRERR;
260		si.si_addr = (void __user *)address;
261		current->thread.arch.faultinfo = fi;
262		force_sig_info(SIGBUS, &si, current);
263	} else {
264		BUG_ON(err != -EFAULT);
265		si.si_signo = SIGSEGV;
266		si.si_addr = (void __user *) address;
267		current->thread.arch.faultinfo = fi;
268		force_sig_info(SIGSEGV, &si, current);
269	}
270
271out:
272	if (regs)
273		current->thread.segv_regs = NULL;
274
275	return 0;
276}
277
278void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs)
279{
280	struct faultinfo *fi;
281	struct siginfo clean_si;
282
283	if (!UPT_IS_USER(regs)) {
284		if (sig == SIGBUS)
285			printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
286			       "mount likely just ran out of space\n");
287		panic("Kernel mode signal %d", sig);
288	}
289
290	arch_examine_signal(sig, regs);
291
292	memset(&clean_si, 0, sizeof(clean_si));
293	clean_si.si_signo = si->si_signo;
294	clean_si.si_errno = si->si_errno;
295	clean_si.si_code = si->si_code;
296	switch (sig) {
297	case SIGILL:
298	case SIGFPE:
299	case SIGSEGV:
300	case SIGBUS:
301	case SIGTRAP:
302		fi = UPT_FAULTINFO(regs);
303		clean_si.si_addr = (void __user *) FAULT_ADDRESS(*fi);
304		current->thread.arch.faultinfo = *fi;
305#ifdef __ARCH_SI_TRAPNO
306		clean_si.si_trapno = si->si_trapno;
307#endif
308		break;
309	default:
310		printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d)\n",
311			sig, si->si_code);
312	}
313
314	force_sig_info(sig, &clean_si, current);
315}
316
317void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs)
318{
319	if (current->thread.fault_catcher != NULL)
320		UML_LONGJMP(current->thread.fault_catcher, 1);
321	else
322		relay_signal(sig, si, regs);
323}
324
325void winch(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
326{
327	do_IRQ(WINCH_IRQ, regs);
328}
329
330void trap_init(void)
331{
332}
333