1/*
2 * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
3 * Licensed under the GPL
4 */
5
6#include <linux/mm.h>
7#include <linux/module.h>
8#include <linux/sched.h>
9#include <asm/pgtable.h>
10#include <asm/tlbflush.h>
11#include <as-layout.h>
12#include <mem_user.h>
13#include <os.h>
14#include <skas.h>
15#include <kern_util.h>
16
17struct host_vm_change {
18	struct host_vm_op {
19		enum { NONE, MMAP, MUNMAP, MPROTECT } type;
20		union {
21			struct {
22				unsigned long addr;
23				unsigned long len;
24				unsigned int prot;
25				int fd;
26				__u64 offset;
27			} mmap;
28			struct {
29				unsigned long addr;
30				unsigned long len;
31			} munmap;
32			struct {
33				unsigned long addr;
34				unsigned long len;
35				unsigned int prot;
36			} mprotect;
37		} u;
38	} ops[1];
39	int index;
40	struct mm_id *id;
41	void *data;
42	int force;
43};
44
45#define INIT_HVC(mm, force) \
46	((struct host_vm_change) \
47	 { .ops		= { { .type = NONE } },	\
48	   .id		= &mm->context.id, \
49       	   .data	= NULL, \
50	   .index	= 0, \
51	   .force	= force })
52
53static int do_ops(struct host_vm_change *hvc, int end,
54		  int finished)
55{
56	struct host_vm_op *op;
57	int i, ret = 0;
58
59	for (i = 0; i < end && !ret; i++) {
60		op = &hvc->ops[i];
61		switch (op->type) {
62		case MMAP:
63			ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
64				  op->u.mmap.prot, op->u.mmap.fd,
65				  op->u.mmap.offset, finished, &hvc->data);
66			break;
67		case MUNMAP:
68			ret = unmap(hvc->id, op->u.munmap.addr,
69				    op->u.munmap.len, finished, &hvc->data);
70			break;
71		case MPROTECT:
72			ret = protect(hvc->id, op->u.mprotect.addr,
73				      op->u.mprotect.len, op->u.mprotect.prot,
74				      finished, &hvc->data);
75			break;
76		default:
77			printk(KERN_ERR "Unknown op type %d in do_ops\n",
78			       op->type);
79			BUG();
80			break;
81		}
82	}
83
84	return ret;
85}
86
87static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
88		    unsigned int prot, struct host_vm_change *hvc)
89{
90	__u64 offset;
91	struct host_vm_op *last;
92	int fd, ret = 0;
93
94	fd = phys_mapping(phys, &offset);
95	if (hvc->index != 0) {
96		last = &hvc->ops[hvc->index - 1];
97		if ((last->type == MMAP) &&
98		   (last->u.mmap.addr + last->u.mmap.len == virt) &&
99		   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
100		   (last->u.mmap.offset + last->u.mmap.len == offset)) {
101			last->u.mmap.len += len;
102			return 0;
103		}
104	}
105
106	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
107		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
108		hvc->index = 0;
109	}
110
111	hvc->ops[hvc->index++] = ((struct host_vm_op)
112				  { .type	= MMAP,
113				    .u = { .mmap = { .addr	= virt,
114						     .len	= len,
115						     .prot	= prot,
116						     .fd	= fd,
117						     .offset	= offset }
118			   } });
119	return ret;
120}
121
122static int add_munmap(unsigned long addr, unsigned long len,
123		      struct host_vm_change *hvc)
124{
125	struct host_vm_op *last;
126	int ret = 0;
127
128	if ((addr >= STUB_START) && (addr < STUB_END))
129		return -EINVAL;
130
131	if (hvc->index != 0) {
132		last = &hvc->ops[hvc->index - 1];
133		if ((last->type == MUNMAP) &&
134		   (last->u.munmap.addr + last->u.mmap.len == addr)) {
135			last->u.munmap.len += len;
136			return 0;
137		}
138	}
139
140	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
141		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
142		hvc->index = 0;
143	}
144
145	hvc->ops[hvc->index++] = ((struct host_vm_op)
146				  { .type	= MUNMAP,
147			     	    .u = { .munmap = { .addr	= addr,
148						       .len	= len } } });
149	return ret;
150}
151
152static int add_mprotect(unsigned long addr, unsigned long len,
153			unsigned int prot, struct host_vm_change *hvc)
154{
155	struct host_vm_op *last;
156	int ret = 0;
157
158	if (hvc->index != 0) {
159		last = &hvc->ops[hvc->index - 1];
160		if ((last->type == MPROTECT) &&
161		   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
162		   (last->u.mprotect.prot == prot)) {
163			last->u.mprotect.len += len;
164			return 0;
165		}
166	}
167
168	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
169		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
170		hvc->index = 0;
171	}
172
173	hvc->ops[hvc->index++] = ((struct host_vm_op)
174				  { .type	= MPROTECT,
175			     	    .u = { .mprotect = { .addr	= addr,
176							 .len	= len,
177							 .prot	= prot } } });
178	return ret;
179}
180
181#define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
182
183static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
184				   unsigned long end,
185				   struct host_vm_change *hvc)
186{
187	pte_t *pte;
188	int r, w, x, prot, ret = 0;
189
190	pte = pte_offset_kernel(pmd, addr);
191	do {
192		if ((addr >= STUB_START) && (addr < STUB_END))
193			continue;
194
195		r = pte_read(*pte);
196		w = pte_write(*pte);
197		x = pte_exec(*pte);
198		if (!pte_young(*pte)) {
199			r = 0;
200			w = 0;
201		} else if (!pte_dirty(*pte))
202			w = 0;
203
204		prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
205			(x ? UM_PROT_EXEC : 0));
206		if (hvc->force || pte_newpage(*pte)) {
207			if (pte_present(*pte))
208				ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
209					       PAGE_SIZE, prot, hvc);
210			else
211				ret = add_munmap(addr, PAGE_SIZE, hvc);
212		} else if (pte_newprot(*pte))
213			ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
214		*pte = pte_mkuptodate(*pte);
215	} while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
216	return ret;
217}
218
219static inline int update_pmd_range(pud_t *pud, unsigned long addr,
220				   unsigned long end,
221				   struct host_vm_change *hvc)
222{
223	pmd_t *pmd;
224	unsigned long next;
225	int ret = 0;
226
227	pmd = pmd_offset(pud, addr);
228	do {
229		next = pmd_addr_end(addr, end);
230		if (!pmd_present(*pmd)) {
231			if (hvc->force || pmd_newpage(*pmd)) {
232				ret = add_munmap(addr, next - addr, hvc);
233				pmd_mkuptodate(*pmd);
234			}
235		}
236		else ret = update_pte_range(pmd, addr, next, hvc);
237	} while (pmd++, addr = next, ((addr < end) && !ret));
238	return ret;
239}
240
241static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
242				   unsigned long end,
243				   struct host_vm_change *hvc)
244{
245	pud_t *pud;
246	unsigned long next;
247	int ret = 0;
248
249	pud = pud_offset(pgd, addr);
250	do {
251		next = pud_addr_end(addr, end);
252		if (!pud_present(*pud)) {
253			if (hvc->force || pud_newpage(*pud)) {
254				ret = add_munmap(addr, next - addr, hvc);
255				pud_mkuptodate(*pud);
256			}
257		}
258		else ret = update_pmd_range(pud, addr, next, hvc);
259	} while (pud++, addr = next, ((addr < end) && !ret));
260	return ret;
261}
262
263void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
264		      unsigned long end_addr, int force)
265{
266	pgd_t *pgd;
267	struct host_vm_change hvc;
268	unsigned long addr = start_addr, next;
269	int ret = 0;
270
271	hvc = INIT_HVC(mm, force);
272	pgd = pgd_offset(mm, addr);
273	do {
274		next = pgd_addr_end(addr, end_addr);
275		if (!pgd_present(*pgd)) {
276			if (force || pgd_newpage(*pgd)) {
277				ret = add_munmap(addr, next - addr, &hvc);
278				pgd_mkuptodate(*pgd);
279			}
280		}
281		else ret = update_pud_range(pgd, addr, next, &hvc);
282	} while (pgd++, addr = next, ((addr < end_addr) && !ret));
283
284	if (!ret)
285		ret = do_ops(&hvc, hvc.index, 1);
286
287	/* This is not an else because ret is modified above */
288	if (ret) {
289		printk(KERN_ERR "fix_range_common: failed, killing current "
290		       "process: %d\n", task_tgid_vnr(current));
291		/* We are under mmap_sem, release it such that current can terminate */
292		up_write(&current->mm->mmap_sem);
293		force_sig(SIGKILL, current);
294		do_signal();
295	}
296}
297
298static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
299{
300	struct mm_struct *mm;
301	pgd_t *pgd;
302	pud_t *pud;
303	pmd_t *pmd;
304	pte_t *pte;
305	unsigned long addr, last;
306	int updated = 0, err;
307
308	mm = &init_mm;
309	for (addr = start; addr < end;) {
310		pgd = pgd_offset(mm, addr);
311		if (!pgd_present(*pgd)) {
312			last = ADD_ROUND(addr, PGDIR_SIZE);
313			if (last > end)
314				last = end;
315			if (pgd_newpage(*pgd)) {
316				updated = 1;
317				err = os_unmap_memory((void *) addr,
318						      last - addr);
319				if (err < 0)
320					panic("munmap failed, errno = %d\n",
321					      -err);
322			}
323			addr = last;
324			continue;
325		}
326
327		pud = pud_offset(pgd, addr);
328		if (!pud_present(*pud)) {
329			last = ADD_ROUND(addr, PUD_SIZE);
330			if (last > end)
331				last = end;
332			if (pud_newpage(*pud)) {
333				updated = 1;
334				err = os_unmap_memory((void *) addr,
335						      last - addr);
336				if (err < 0)
337					panic("munmap failed, errno = %d\n",
338					      -err);
339			}
340			addr = last;
341			continue;
342		}
343
344		pmd = pmd_offset(pud, addr);
345		if (!pmd_present(*pmd)) {
346			last = ADD_ROUND(addr, PMD_SIZE);
347			if (last > end)
348				last = end;
349			if (pmd_newpage(*pmd)) {
350				updated = 1;
351				err = os_unmap_memory((void *) addr,
352						      last - addr);
353				if (err < 0)
354					panic("munmap failed, errno = %d\n",
355					      -err);
356			}
357			addr = last;
358			continue;
359		}
360
361		pte = pte_offset_kernel(pmd, addr);
362		if (!pte_present(*pte) || pte_newpage(*pte)) {
363			updated = 1;
364			err = os_unmap_memory((void *) addr,
365					      PAGE_SIZE);
366			if (err < 0)
367				panic("munmap failed, errno = %d\n",
368				      -err);
369			if (pte_present(*pte))
370				map_memory(addr,
371					   pte_val(*pte) & PAGE_MASK,
372					   PAGE_SIZE, 1, 1, 1);
373		}
374		else if (pte_newprot(*pte)) {
375			updated = 1;
376			os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
377		}
378		addr += PAGE_SIZE;
379	}
380	return updated;
381}
382
383void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
384{
385	pgd_t *pgd;
386	pud_t *pud;
387	pmd_t *pmd;
388	pte_t *pte;
389	struct mm_struct *mm = vma->vm_mm;
390	void *flush = NULL;
391	int r, w, x, prot, err = 0;
392	struct mm_id *mm_id;
393
394	address &= PAGE_MASK;
395	pgd = pgd_offset(mm, address);
396	if (!pgd_present(*pgd))
397		goto kill;
398
399	pud = pud_offset(pgd, address);
400	if (!pud_present(*pud))
401		goto kill;
402
403	pmd = pmd_offset(pud, address);
404	if (!pmd_present(*pmd))
405		goto kill;
406
407	pte = pte_offset_kernel(pmd, address);
408
409	r = pte_read(*pte);
410	w = pte_write(*pte);
411	x = pte_exec(*pte);
412	if (!pte_young(*pte)) {
413		r = 0;
414		w = 0;
415	} else if (!pte_dirty(*pte)) {
416		w = 0;
417	}
418
419	mm_id = &mm->context.id;
420	prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
421		(x ? UM_PROT_EXEC : 0));
422	if (pte_newpage(*pte)) {
423		if (pte_present(*pte)) {
424			unsigned long long offset;
425			int fd;
426
427			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
428			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
429				  1, &flush);
430		}
431		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
432	}
433	else if (pte_newprot(*pte))
434		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
435
436	if (err)
437		goto kill;
438
439	*pte = pte_mkuptodate(*pte);
440
441	return;
442
443kill:
444	printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
445	force_sig(SIGKILL, current);
446}
447
448pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
449{
450	return pgd_offset(mm, address);
451}
452
453pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
454{
455	return pud_offset(pgd, address);
456}
457
458pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
459{
460	return pmd_offset(pud, address);
461}
462
463pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
464{
465	return pte_offset_kernel(pmd, address);
466}
467
468pte_t *addr_pte(struct task_struct *task, unsigned long addr)
469{
470	pgd_t *pgd = pgd_offset(task->mm, addr);
471	pud_t *pud = pud_offset(pgd, addr);
472	pmd_t *pmd = pmd_offset(pud, addr);
473
474	return pte_offset_map(pmd, addr);
475}
476
477void flush_tlb_all(void)
478{
479	flush_tlb_mm(current->mm);
480}
481
482void flush_tlb_kernel_range(unsigned long start, unsigned long end)
483{
484	flush_tlb_kernel_range_common(start, end);
485}
486
487void flush_tlb_kernel_vm(void)
488{
489	flush_tlb_kernel_range_common(start_vm, end_vm);
490}
491
492void __flush_tlb_one(unsigned long addr)
493{
494	flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
495}
496
497static void fix_range(struct mm_struct *mm, unsigned long start_addr,
498		      unsigned long end_addr, int force)
499{
500	fix_range_common(mm, start_addr, end_addr, force);
501}
502
503void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
504		     unsigned long end)
505{
506	if (vma->vm_mm == NULL)
507		flush_tlb_kernel_range_common(start, end);
508	else fix_range(vma->vm_mm, start, end, 0);
509}
510EXPORT_SYMBOL(flush_tlb_range);
511
512void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
513			unsigned long end)
514{
515	/*
516	 * Don't bother flushing if this address space is about to be
517	 * destroyed.
518	 */
519	if (atomic_read(&mm->mm_users) == 0)
520		return;
521
522	fix_range(mm, start, end, 0);
523}
524
525void flush_tlb_mm(struct mm_struct *mm)
526{
527	struct vm_area_struct *vma = mm->mmap;
528
529	while (vma != NULL) {
530		fix_range(mm, vma->vm_start, vma->vm_end, 0);
531		vma = vma->vm_next;
532	}
533}
534
535void force_flush_all(void)
536{
537	struct mm_struct *mm = current->mm;
538	struct vm_area_struct *vma = mm->mmap;
539
540	while (vma != NULL) {
541		fix_range(mm, vma->vm_start, vma->vm_end, 1);
542		vma = vma->vm_next;
543	}
544}
545