1/*
2 * Based on arch/arm/mm/mmu.c
3 *
4 * Copyright (C) 1995-2005 Russell King
5 * Copyright (C) 2012 ARM Ltd.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include <linux/export.h>
21#include <linux/kernel.h>
22#include <linux/errno.h>
23#include <linux/init.h>
24#include <linux/mman.h>
25#include <linux/nodemask.h>
26#include <linux/memblock.h>
27#include <linux/fs.h>
28#include <linux/io.h>
29#include <linux/slab.h>
30#include <linux/stop_machine.h>
31
32#include <asm/cputype.h>
33#include <asm/fixmap.h>
34#include <asm/sections.h>
35#include <asm/setup.h>
36#include <asm/sizes.h>
37#include <asm/tlb.h>
38#include <asm/memblock.h>
39#include <asm/mmu_context.h>
40
41#include "mm.h"
42
43u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
44
45/*
46 * Empty_zero_page is a special page that is used for zero-initialized data
47 * and COW.
48 */
49struct page *empty_zero_page;
50EXPORT_SYMBOL(empty_zero_page);
51
52pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
53			      unsigned long size, pgprot_t vma_prot)
54{
55	if (!pfn_valid(pfn))
56		return pgprot_noncached(vma_prot);
57	else if (file->f_flags & O_SYNC)
58		return pgprot_writecombine(vma_prot);
59	return vma_prot;
60}
61EXPORT_SYMBOL(phys_mem_access_prot);
62
63static void __init *early_alloc(unsigned long sz)
64{
65	void *ptr = __va(memblock_alloc(sz, sz));
66	BUG_ON(!ptr);
67	memset(ptr, 0, sz);
68	return ptr;
69}
70
71/*
72 * remap a PMD into pages
73 */
74static void split_pmd(pmd_t *pmd, pte_t *pte)
75{
76	unsigned long pfn = pmd_pfn(*pmd);
77	int i = 0;
78
79	do {
80		/*
81		 * Need to have the least restrictive permissions available
82		 * permissions will be fixed up later
83		 */
84		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
85		pfn++;
86	} while (pte++, i++, i < PTRS_PER_PTE);
87}
88
89static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
90				  unsigned long end, unsigned long pfn,
91				  pgprot_t prot,
92				  void *(*alloc)(unsigned long size))
93{
94	pte_t *pte;
95
96	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
97		pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
98		if (pmd_sect(*pmd))
99			split_pmd(pmd, pte);
100		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
101		flush_tlb_all();
102	}
103	BUG_ON(pmd_bad(*pmd));
104
105	pte = pte_offset_kernel(pmd, addr);
106	do {
107		set_pte(pte, pfn_pte(pfn, prot));
108		pfn++;
109	} while (pte++, addr += PAGE_SIZE, addr != end);
110}
111
112void split_pud(pud_t *old_pud, pmd_t *pmd)
113{
114	unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT;
115	pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr);
116	int i = 0;
117
118	do {
119		set_pmd(pmd, __pmd(addr | prot));
120		addr += PMD_SIZE;
121	} while (pmd++, i++, i < PTRS_PER_PMD);
122}
123
124static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
125				  unsigned long addr, unsigned long end,
126				  phys_addr_t phys, pgprot_t prot,
127				  void *(*alloc)(unsigned long size))
128{
129	pmd_t *pmd;
130	unsigned long next;
131
132	/*
133	 * Check for initial section mappings in the pgd/pud and remove them.
134	 */
135	if (pud_none(*pud) || pud_sect(*pud)) {
136		pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t));
137		if (pud_sect(*pud)) {
138			/*
139			 * need to have the 1G of mappings continue to be
140			 * present
141			 */
142			split_pud(pud, pmd);
143		}
144		pud_populate(mm, pud, pmd);
145		flush_tlb_all();
146	}
147	BUG_ON(pud_bad(*pud));
148
149	pmd = pmd_offset(pud, addr);
150	do {
151		next = pmd_addr_end(addr, end);
152		/* try section mapping first */
153		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
154			pmd_t old_pmd =*pmd;
155			set_pmd(pmd, __pmd(phys |
156					   pgprot_val(mk_sect_prot(prot))));
157			/*
158			 * Check for previous table entries created during
159			 * boot (__create_page_tables) and flush them.
160			 */
161			if (!pmd_none(old_pmd)) {
162				flush_tlb_all();
163				if (pmd_table(old_pmd)) {
164					phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0));
165					if (!WARN_ON_ONCE(slab_is_available()))
166						memblock_free(table, PAGE_SIZE);
167				}
168			}
169		} else {
170			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
171				       prot, alloc);
172		}
173		phys += next - addr;
174	} while (pmd++, addr = next, addr != end);
175}
176
177static inline bool use_1G_block(unsigned long addr, unsigned long next,
178			unsigned long phys)
179{
180	if (PAGE_SHIFT != 12)
181		return false;
182
183	if (((addr | next | phys) & ~PUD_MASK) != 0)
184		return false;
185
186	return true;
187}
188
189static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
190				  unsigned long addr, unsigned long end,
191				  phys_addr_t phys, pgprot_t prot,
192				  void *(*alloc)(unsigned long size))
193{
194	pud_t *pud;
195	unsigned long next;
196
197	if (pgd_none(*pgd)) {
198		pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
199		pgd_populate(mm, pgd, pud);
200	}
201	BUG_ON(pgd_bad(*pgd));
202
203	pud = pud_offset(pgd, addr);
204	do {
205		next = pud_addr_end(addr, end);
206
207		/*
208		 * For 4K granule only, attempt to put down a 1GB block
209		 */
210		if (use_1G_block(addr, next, phys)) {
211			pud_t old_pud = *pud;
212			set_pud(pud, __pud(phys |
213					   pgprot_val(mk_sect_prot(prot))));
214
215			/*
216			 * If we have an old value for a pud, it will
217			 * be pointing to a pmd table that we no longer
218			 * need (from swapper_pg_dir).
219			 *
220			 * Look up the old pmd table and free it.
221			 */
222			if (!pud_none(old_pud)) {
223				flush_tlb_all();
224				if (pud_table(old_pud)) {
225					phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
226					if (!WARN_ON_ONCE(slab_is_available()))
227						memblock_free(table, PAGE_SIZE);
228				}
229			}
230		} else {
231			alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc);
232		}
233		phys += next - addr;
234	} while (pud++, addr = next, addr != end);
235}
236
237/*
238 * Create the page directory entries and any necessary page tables for the
239 * mapping specified by 'md'.
240 */
241static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
242				    phys_addr_t phys, unsigned long virt,
243				    phys_addr_t size, pgprot_t prot,
244				    void *(*alloc)(unsigned long size))
245{
246	unsigned long addr, length, end, next;
247
248	addr = virt & PAGE_MASK;
249	length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
250
251	end = addr + length;
252	do {
253		next = pgd_addr_end(addr, end);
254		alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc);
255		phys += next - addr;
256	} while (pgd++, addr = next, addr != end);
257}
258
259static void *late_alloc(unsigned long size)
260{
261	void *ptr;
262
263	BUG_ON(size > PAGE_SIZE);
264	ptr = (void *)__get_free_page(PGALLOC_GFP);
265	BUG_ON(!ptr);
266	return ptr;
267}
268
269static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
270				  phys_addr_t size, pgprot_t prot)
271{
272	if (virt < VMALLOC_START) {
273		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
274			&phys, virt);
275		return;
276	}
277	__create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt,
278			 size, prot, early_alloc);
279}
280
281void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
282			       unsigned long virt, phys_addr_t size,
283			       pgprot_t prot)
284{
285	__create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
286				late_alloc);
287}
288
289static void create_mapping_late(phys_addr_t phys, unsigned long virt,
290				  phys_addr_t size, pgprot_t prot)
291{
292	if (virt < VMALLOC_START) {
293		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
294			&phys, virt);
295		return;
296	}
297
298	return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK),
299				phys, virt, size, prot, late_alloc);
300}
301
302#ifdef CONFIG_DEBUG_RODATA
303static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
304{
305	/*
306	 * Set up the executable regions using the existing section mappings
307	 * for now. This will get more fine grained later once all memory
308	 * is mapped
309	 */
310	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
311	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
312
313	if (end < kernel_x_start) {
314		create_mapping(start, __phys_to_virt(start),
315			end - start, PAGE_KERNEL);
316	} else if (start >= kernel_x_end) {
317		create_mapping(start, __phys_to_virt(start),
318			end - start, PAGE_KERNEL);
319	} else {
320		if (start < kernel_x_start)
321			create_mapping(start, __phys_to_virt(start),
322				kernel_x_start - start,
323				PAGE_KERNEL);
324		create_mapping(kernel_x_start,
325				__phys_to_virt(kernel_x_start),
326				kernel_x_end - kernel_x_start,
327				PAGE_KERNEL_EXEC);
328		if (kernel_x_end < end)
329			create_mapping(kernel_x_end,
330				__phys_to_virt(kernel_x_end),
331				end - kernel_x_end,
332				PAGE_KERNEL);
333	}
334
335}
336#else
337static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
338{
339	create_mapping(start, __phys_to_virt(start), end - start,
340			PAGE_KERNEL_EXEC);
341}
342#endif
343
344static void __init map_mem(void)
345{
346	struct memblock_region *reg;
347	phys_addr_t limit;
348
349	/*
350	 * Temporarily limit the memblock range. We need to do this as
351	 * create_mapping requires puds, pmds and ptes to be allocated from
352	 * memory addressable from the initial direct kernel mapping.
353	 *
354	 * The initial direct kernel mapping, located at swapper_pg_dir, gives
355	 * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
356	 * PHYS_OFFSET (which must be aligned to 2MB as per
357	 * Documentation/arm64/booting.txt).
358	 */
359	if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
360		limit = PHYS_OFFSET + PMD_SIZE;
361	else
362		limit = PHYS_OFFSET + PUD_SIZE;
363	memblock_set_current_limit(limit);
364
365	/* map all the memory banks */
366	for_each_memblock(memory, reg) {
367		phys_addr_t start = reg->base;
368		phys_addr_t end = start + reg->size;
369
370		if (start >= end)
371			break;
372
373#ifndef CONFIG_ARM64_64K_PAGES
374		/*
375		 * For the first memory bank align the start address and
376		 * current memblock limit to prevent create_mapping() from
377		 * allocating pte page tables from unmapped memory.
378		 * When 64K pages are enabled, the pte page table for the
379		 * first PGDIR_SIZE is already present in swapper_pg_dir.
380		 */
381		if (start < limit)
382			start = ALIGN(start, PMD_SIZE);
383		if (end < limit) {
384			limit = end & PMD_MASK;
385			memblock_set_current_limit(limit);
386		}
387#endif
388		__map_memblock(start, end);
389	}
390
391	/* Limit no longer required. */
392	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
393}
394
395void __init fixup_executable(void)
396{
397#ifdef CONFIG_DEBUG_RODATA
398	/* now that we are actually fully mapped, make the start/end more fine grained */
399	if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
400		unsigned long aligned_start = round_down(__pa(_stext),
401							SECTION_SIZE);
402
403		create_mapping(aligned_start, __phys_to_virt(aligned_start),
404				__pa(_stext) - aligned_start,
405				PAGE_KERNEL);
406	}
407
408	if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
409		unsigned long aligned_end = round_up(__pa(__init_end),
410							SECTION_SIZE);
411		create_mapping(__pa(__init_end), (unsigned long)__init_end,
412				aligned_end - __pa(__init_end),
413				PAGE_KERNEL);
414	}
415#endif
416}
417
418#ifdef CONFIG_DEBUG_RODATA
419void mark_rodata_ro(void)
420{
421	create_mapping_late(__pa(_stext), (unsigned long)_stext,
422				(unsigned long)_etext - (unsigned long)_stext,
423				PAGE_KERNEL_EXEC | PTE_RDONLY);
424
425}
426#endif
427
428void fixup_init(void)
429{
430	create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
431			(unsigned long)__init_end - (unsigned long)__init_begin,
432			PAGE_KERNEL);
433}
434
435/*
436 * paging_init() sets up the page tables, initialises the zone memory
437 * maps and sets up the zero page.
438 */
439void __init paging_init(void)
440{
441	void *zero_page;
442
443	map_mem();
444	fixup_executable();
445
446	/* allocate the zero page. */
447	zero_page = early_alloc(PAGE_SIZE);
448
449	bootmem_init();
450
451	empty_zero_page = virt_to_page(zero_page);
452
453	/* Ensure the zero page is visible to the page table walker */
454	dsb(ishst);
455
456	/*
457	 * TTBR0 is only used for the identity mapping at this stage. Make it
458	 * point to zero page to avoid speculatively fetching new entries.
459	 */
460	cpu_set_reserved_ttbr0();
461	flush_tlb_all();
462	cpu_set_default_tcr_t0sz();
463}
464
465/*
466 * Enable the identity mapping to allow the MMU disabling.
467 */
468void setup_mm_for_reboot(void)
469{
470	cpu_set_reserved_ttbr0();
471	flush_tlb_all();
472	cpu_set_idmap_tcr_t0sz();
473	cpu_switch_mm(idmap_pg_dir, &init_mm);
474}
475
476/*
477 * Check whether a kernel address is valid (derived from arch/x86/).
478 */
479int kern_addr_valid(unsigned long addr)
480{
481	pgd_t *pgd;
482	pud_t *pud;
483	pmd_t *pmd;
484	pte_t *pte;
485
486	if ((((long)addr) >> VA_BITS) != -1UL)
487		return 0;
488
489	pgd = pgd_offset_k(addr);
490	if (pgd_none(*pgd))
491		return 0;
492
493	pud = pud_offset(pgd, addr);
494	if (pud_none(*pud))
495		return 0;
496
497	if (pud_sect(*pud))
498		return pfn_valid(pud_pfn(*pud));
499
500	pmd = pmd_offset(pud, addr);
501	if (pmd_none(*pmd))
502		return 0;
503
504	if (pmd_sect(*pmd))
505		return pfn_valid(pmd_pfn(*pmd));
506
507	pte = pte_offset_kernel(pmd, addr);
508	if (pte_none(*pte))
509		return 0;
510
511	return pfn_valid(pte_pfn(*pte));
512}
513#ifdef CONFIG_SPARSEMEM_VMEMMAP
514#ifdef CONFIG_ARM64_64K_PAGES
515int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
516{
517	return vmemmap_populate_basepages(start, end, node);
518}
519#else	/* !CONFIG_ARM64_64K_PAGES */
520int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
521{
522	unsigned long addr = start;
523	unsigned long next;
524	pgd_t *pgd;
525	pud_t *pud;
526	pmd_t *pmd;
527
528	do {
529		next = pmd_addr_end(addr, end);
530
531		pgd = vmemmap_pgd_populate(addr, node);
532		if (!pgd)
533			return -ENOMEM;
534
535		pud = vmemmap_pud_populate(pgd, addr, node);
536		if (!pud)
537			return -ENOMEM;
538
539		pmd = pmd_offset(pud, addr);
540		if (pmd_none(*pmd)) {
541			void *p = NULL;
542
543			p = vmemmap_alloc_block_buf(PMD_SIZE, node);
544			if (!p)
545				return -ENOMEM;
546
547			set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
548		} else
549			vmemmap_verify((pte_t *)pmd, node, addr, next);
550	} while (addr = next, addr != end);
551
552	return 0;
553}
554#endif	/* CONFIG_ARM64_64K_PAGES */
555void vmemmap_free(unsigned long start, unsigned long end)
556{
557}
558#endif	/* CONFIG_SPARSEMEM_VMEMMAP */
559
560static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
561#if CONFIG_PGTABLE_LEVELS > 2
562static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
563#endif
564#if CONFIG_PGTABLE_LEVELS > 3
565static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
566#endif
567
568static inline pud_t * fixmap_pud(unsigned long addr)
569{
570	pgd_t *pgd = pgd_offset_k(addr);
571
572	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
573
574	return pud_offset(pgd, addr);
575}
576
577static inline pmd_t * fixmap_pmd(unsigned long addr)
578{
579	pud_t *pud = fixmap_pud(addr);
580
581	BUG_ON(pud_none(*pud) || pud_bad(*pud));
582
583	return pmd_offset(pud, addr);
584}
585
586static inline pte_t * fixmap_pte(unsigned long addr)
587{
588	pmd_t *pmd = fixmap_pmd(addr);
589
590	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
591
592	return pte_offset_kernel(pmd, addr);
593}
594
595void __init early_fixmap_init(void)
596{
597	pgd_t *pgd;
598	pud_t *pud;
599	pmd_t *pmd;
600	unsigned long addr = FIXADDR_START;
601
602	pgd = pgd_offset_k(addr);
603	pgd_populate(&init_mm, pgd, bm_pud);
604	pud = pud_offset(pgd, addr);
605	pud_populate(&init_mm, pud, bm_pmd);
606	pmd = pmd_offset(pud, addr);
607	pmd_populate_kernel(&init_mm, pmd, bm_pte);
608
609	/*
610	 * The boot-ioremap range spans multiple pmds, for which
611	 * we are not preparted:
612	 */
613	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
614		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
615
616	if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
617	     || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
618		WARN_ON(1);
619		pr_warn("pmd %p != %p, %p\n",
620			pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
621			fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
622		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
623			fix_to_virt(FIX_BTMAP_BEGIN));
624		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
625			fix_to_virt(FIX_BTMAP_END));
626
627		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
628		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
629	}
630}
631
632void __set_fixmap(enum fixed_addresses idx,
633			       phys_addr_t phys, pgprot_t flags)
634{
635	unsigned long addr = __fix_to_virt(idx);
636	pte_t *pte;
637
638	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
639
640	pte = fixmap_pte(addr);
641
642	if (pgprot_val(flags)) {
643		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
644	} else {
645		pte_clear(&init_mm, addr, pte);
646		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
647	}
648}
649