1/*
2 *  linux/arch/arm/mm/mmu.c
3 *
4 *  Copyright (C) 1995-2005 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/module.h>
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/init.h>
14#include <linux/mman.h>
15#include <linux/nodemask.h>
16#include <linux/memblock.h>
17#include <linux/fs.h>
18#include <linux/vmalloc.h>
19#include <linux/sizes.h>
20
21#include <asm/cp15.h>
22#include <asm/cputype.h>
23#include <asm/sections.h>
24#include <asm/cachetype.h>
25#include <asm/fixmap.h>
26#include <asm/sections.h>
27#include <asm/setup.h>
28#include <asm/smp_plat.h>
29#include <asm/tlb.h>
30#include <asm/highmem.h>
31#include <asm/system_info.h>
32#include <asm/traps.h>
33#include <asm/procinfo.h>
34#include <asm/memory.h>
35
36#include <asm/mach/arch.h>
37#include <asm/mach/map.h>
38#include <asm/mach/pci.h>
39#include <asm/fixmap.h>
40
41#include "mm.h"
42#include "tcm.h"
43
44/*
45 * empty_zero_page is a special page that is used for
46 * zero-initialized data and COW.
47 */
48struct page *empty_zero_page;
49EXPORT_SYMBOL(empty_zero_page);
50
51/*
52 * The pmd table for the upper-most set of pages.
53 */
54pmd_t *top_pmd;
55
56pmdval_t user_pmd_table = _PAGE_USER_TABLE;
57
58#define CPOLICY_UNCACHED	0
59#define CPOLICY_BUFFERED	1
60#define CPOLICY_WRITETHROUGH	2
61#define CPOLICY_WRITEBACK	3
62#define CPOLICY_WRITEALLOC	4
63
64static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
65static unsigned int ecc_mask __initdata = 0;
66pgprot_t pgprot_user;
67pgprot_t pgprot_kernel;
68pgprot_t pgprot_hyp_device;
69pgprot_t pgprot_s2;
70pgprot_t pgprot_s2_device;
71
72EXPORT_SYMBOL(pgprot_user);
73EXPORT_SYMBOL(pgprot_kernel);
74
75struct cachepolicy {
76	const char	policy[16];
77	unsigned int	cr_mask;
78	pmdval_t	pmd;
79	pteval_t	pte;
80	pteval_t	pte_s2;
81};
82
83#ifdef CONFIG_ARM_LPAE
84#define s2_policy(policy)	policy
85#else
86#define s2_policy(policy)	0
87#endif
88
89static struct cachepolicy cache_policies[] __initdata = {
90	{
91		.policy		= "uncached",
92		.cr_mask	= CR_W|CR_C,
93		.pmd		= PMD_SECT_UNCACHED,
94		.pte		= L_PTE_MT_UNCACHED,
95		.pte_s2		= s2_policy(L_PTE_S2_MT_UNCACHED),
96	}, {
97		.policy		= "buffered",
98		.cr_mask	= CR_C,
99		.pmd		= PMD_SECT_BUFFERED,
100		.pte		= L_PTE_MT_BUFFERABLE,
101		.pte_s2		= s2_policy(L_PTE_S2_MT_UNCACHED),
102	}, {
103		.policy		= "writethrough",
104		.cr_mask	= 0,
105		.pmd		= PMD_SECT_WT,
106		.pte		= L_PTE_MT_WRITETHROUGH,
107		.pte_s2		= s2_policy(L_PTE_S2_MT_WRITETHROUGH),
108	}, {
109		.policy		= "writeback",
110		.cr_mask	= 0,
111		.pmd		= PMD_SECT_WB,
112		.pte		= L_PTE_MT_WRITEBACK,
113		.pte_s2		= s2_policy(L_PTE_S2_MT_WRITEBACK),
114	}, {
115		.policy		= "writealloc",
116		.cr_mask	= 0,
117		.pmd		= PMD_SECT_WBWA,
118		.pte		= L_PTE_MT_WRITEALLOC,
119		.pte_s2		= s2_policy(L_PTE_S2_MT_WRITEBACK),
120	}
121};
122
123#ifdef CONFIG_CPU_CP15
124static unsigned long initial_pmd_value __initdata = 0;
125
126/*
127 * Initialise the cache_policy variable with the initial state specified
128 * via the "pmd" value.  This is used to ensure that on ARMv6 and later,
129 * the C code sets the page tables up with the same policy as the head
130 * assembly code, which avoids an illegal state where the TLBs can get
131 * confused.  See comments in early_cachepolicy() for more information.
132 */
133void __init init_default_cache_policy(unsigned long pmd)
134{
135	int i;
136
137	initial_pmd_value = pmd;
138
139	pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE;
140
141	for (i = 0; i < ARRAY_SIZE(cache_policies); i++)
142		if (cache_policies[i].pmd == pmd) {
143			cachepolicy = i;
144			break;
145		}
146
147	if (i == ARRAY_SIZE(cache_policies))
148		pr_err("ERROR: could not find cache policy\n");
149}
150
151/*
152 * These are useful for identifying cache coherency problems by allowing
153 * the cache or the cache and writebuffer to be turned off.  (Note: the
154 * write buffer should not be on and the cache off).
155 */
156static int __init early_cachepolicy(char *p)
157{
158	int i, selected = -1;
159
160	for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
161		int len = strlen(cache_policies[i].policy);
162
163		if (memcmp(p, cache_policies[i].policy, len) == 0) {
164			selected = i;
165			break;
166		}
167	}
168
169	if (selected == -1)
170		pr_err("ERROR: unknown or unsupported cache policy\n");
171
172	/*
173	 * This restriction is partly to do with the way we boot; it is
174	 * unpredictable to have memory mapped using two different sets of
175	 * memory attributes (shared, type, and cache attribs).  We can not
176	 * change these attributes once the initial assembly has setup the
177	 * page tables.
178	 */
179	if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) {
180		pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n",
181			cache_policies[cachepolicy].policy);
182		return 0;
183	}
184
185	if (selected != cachepolicy) {
186		unsigned long cr = __clear_cr(cache_policies[selected].cr_mask);
187		cachepolicy = selected;
188		flush_cache_all();
189		set_cr(cr);
190	}
191	return 0;
192}
193early_param("cachepolicy", early_cachepolicy);
194
195static int __init early_nocache(char *__unused)
196{
197	char *p = "buffered";
198	pr_warn("nocache is deprecated; use cachepolicy=%s\n", p);
199	early_cachepolicy(p);
200	return 0;
201}
202early_param("nocache", early_nocache);
203
204static int __init early_nowrite(char *__unused)
205{
206	char *p = "uncached";
207	pr_warn("nowb is deprecated; use cachepolicy=%s\n", p);
208	early_cachepolicy(p);
209	return 0;
210}
211early_param("nowb", early_nowrite);
212
213#ifndef CONFIG_ARM_LPAE
214static int __init early_ecc(char *p)
215{
216	if (memcmp(p, "on", 2) == 0)
217		ecc_mask = PMD_PROTECTION;
218	else if (memcmp(p, "off", 3) == 0)
219		ecc_mask = 0;
220	return 0;
221}
222early_param("ecc", early_ecc);
223#endif
224
225#else /* ifdef CONFIG_CPU_CP15 */
226
227static int __init early_cachepolicy(char *p)
228{
229	pr_warn("cachepolicy kernel parameter not supported without cp15\n");
230}
231early_param("cachepolicy", early_cachepolicy);
232
233static int __init noalign_setup(char *__unused)
234{
235	pr_warn("noalign kernel parameter not supported without cp15\n");
236}
237__setup("noalign", noalign_setup);
238
239#endif /* ifdef CONFIG_CPU_CP15 / else */
240
241#define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
242#define PROT_PTE_S2_DEVICE	PROT_PTE_DEVICE
243#define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
244
245static struct mem_type mem_types[] = {
246	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */
247		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
248				  L_PTE_SHARED,
249		.prot_pte_s2	= s2_policy(PROT_PTE_S2_DEVICE) |
250				  s2_policy(L_PTE_S2_MT_DEV_SHARED) |
251				  L_PTE_SHARED,
252		.prot_l1	= PMD_TYPE_TABLE,
253		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_S,
254		.domain		= DOMAIN_IO,
255	},
256	[MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
257		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
258		.prot_l1	= PMD_TYPE_TABLE,
259		.prot_sect	= PROT_SECT_DEVICE,
260		.domain		= DOMAIN_IO,
261	},
262	[MT_DEVICE_CACHED] = {	  /* ioremap_cached */
263		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
264		.prot_l1	= PMD_TYPE_TABLE,
265		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_WB,
266		.domain		= DOMAIN_IO,
267	},
268	[MT_DEVICE_WC] = {	/* ioremap_wc */
269		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
270		.prot_l1	= PMD_TYPE_TABLE,
271		.prot_sect	= PROT_SECT_DEVICE,
272		.domain		= DOMAIN_IO,
273	},
274	[MT_UNCACHED] = {
275		.prot_pte	= PROT_PTE_DEVICE,
276		.prot_l1	= PMD_TYPE_TABLE,
277		.prot_sect	= PMD_TYPE_SECT | PMD_SECT_XN,
278		.domain		= DOMAIN_IO,
279	},
280	[MT_CACHECLEAN] = {
281		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
282		.domain    = DOMAIN_KERNEL,
283	},
284#ifndef CONFIG_ARM_LPAE
285	[MT_MINICLEAN] = {
286		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
287		.domain    = DOMAIN_KERNEL,
288	},
289#endif
290	[MT_LOW_VECTORS] = {
291		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
292				L_PTE_RDONLY,
293		.prot_l1   = PMD_TYPE_TABLE,
294		.domain    = DOMAIN_USER,
295	},
296	[MT_HIGH_VECTORS] = {
297		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
298				L_PTE_USER | L_PTE_RDONLY,
299		.prot_l1   = PMD_TYPE_TABLE,
300		.domain    = DOMAIN_USER,
301	},
302	[MT_MEMORY_RWX] = {
303		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
304		.prot_l1   = PMD_TYPE_TABLE,
305		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
306		.domain    = DOMAIN_KERNEL,
307	},
308	[MT_MEMORY_RW] = {
309		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
310			     L_PTE_XN,
311		.prot_l1   = PMD_TYPE_TABLE,
312		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
313		.domain    = DOMAIN_KERNEL,
314	},
315	[MT_ROM] = {
316		.prot_sect = PMD_TYPE_SECT,
317		.domain    = DOMAIN_KERNEL,
318	},
319	[MT_MEMORY_RWX_NONCACHED] = {
320		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
321				L_PTE_MT_BUFFERABLE,
322		.prot_l1   = PMD_TYPE_TABLE,
323		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
324		.domain    = DOMAIN_KERNEL,
325	},
326	[MT_MEMORY_RW_DTCM] = {
327		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
328				L_PTE_XN,
329		.prot_l1   = PMD_TYPE_TABLE,
330		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
331		.domain    = DOMAIN_KERNEL,
332	},
333	[MT_MEMORY_RWX_ITCM] = {
334		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
335		.prot_l1   = PMD_TYPE_TABLE,
336		.domain    = DOMAIN_KERNEL,
337	},
338	[MT_MEMORY_RW_SO] = {
339		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
340				L_PTE_MT_UNCACHED | L_PTE_XN,
341		.prot_l1   = PMD_TYPE_TABLE,
342		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S |
343				PMD_SECT_UNCACHED | PMD_SECT_XN,
344		.domain    = DOMAIN_KERNEL,
345	},
346	[MT_MEMORY_DMA_READY] = {
347		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
348				L_PTE_XN,
349		.prot_l1   = PMD_TYPE_TABLE,
350		.domain    = DOMAIN_KERNEL,
351	},
352};
353
354const struct mem_type *get_mem_type(unsigned int type)
355{
356	return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
357}
358EXPORT_SYMBOL(get_mem_type);
359
360/*
361 * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range().
362 * As a result, this can only be called with preemption disabled, as under
363 * stop_machine().
364 */
365void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
366{
367	unsigned long vaddr = __fix_to_virt(idx);
368	pte_t *pte = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
369
370	/* Make sure fixmap region does not exceed available allocation. */
371	BUILD_BUG_ON(FIXADDR_START + (__end_of_fixed_addresses * PAGE_SIZE) >
372		     FIXADDR_END);
373	BUG_ON(idx >= __end_of_fixed_addresses);
374
375	if (pgprot_val(prot))
376		set_pte_at(NULL, vaddr, pte,
377			pfn_pte(phys >> PAGE_SHIFT, prot));
378	else
379		pte_clear(NULL, vaddr, pte);
380	local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
381}
382
383/*
384 * Adjust the PMD section entries according to the CPU in use.
385 */
386static void __init build_mem_type_table(void)
387{
388	struct cachepolicy *cp;
389	unsigned int cr = get_cr();
390	pteval_t user_pgprot, kern_pgprot, vecs_pgprot;
391	pteval_t hyp_device_pgprot, s2_pgprot, s2_device_pgprot;
392	int cpu_arch = cpu_architecture();
393	int i;
394
395	if (cpu_arch < CPU_ARCH_ARMv6) {
396#if defined(CONFIG_CPU_DCACHE_DISABLE)
397		if (cachepolicy > CPOLICY_BUFFERED)
398			cachepolicy = CPOLICY_BUFFERED;
399#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
400		if (cachepolicy > CPOLICY_WRITETHROUGH)
401			cachepolicy = CPOLICY_WRITETHROUGH;
402#endif
403	}
404	if (cpu_arch < CPU_ARCH_ARMv5) {
405		if (cachepolicy >= CPOLICY_WRITEALLOC)
406			cachepolicy = CPOLICY_WRITEBACK;
407		ecc_mask = 0;
408	}
409
410	if (is_smp()) {
411		if (cachepolicy != CPOLICY_WRITEALLOC) {
412			pr_warn("Forcing write-allocate cache policy for SMP\n");
413			cachepolicy = CPOLICY_WRITEALLOC;
414		}
415		if (!(initial_pmd_value & PMD_SECT_S)) {
416			pr_warn("Forcing shared mappings for SMP\n");
417			initial_pmd_value |= PMD_SECT_S;
418		}
419	}
420
421	/*
422	 * Strip out features not present on earlier architectures.
423	 * Pre-ARMv5 CPUs don't have TEX bits.  Pre-ARMv6 CPUs or those
424	 * without extended page tables don't have the 'Shared' bit.
425	 */
426	if (cpu_arch < CPU_ARCH_ARMv5)
427		for (i = 0; i < ARRAY_SIZE(mem_types); i++)
428			mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
429	if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
430		for (i = 0; i < ARRAY_SIZE(mem_types); i++)
431			mem_types[i].prot_sect &= ~PMD_SECT_S;
432
433	/*
434	 * ARMv5 and lower, bit 4 must be set for page tables (was: cache
435	 * "update-able on write" bit on ARM610).  However, Xscale and
436	 * Xscale3 require this bit to be cleared.
437	 */
438	if (cpu_is_xscale() || cpu_is_xsc3()) {
439		for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
440			mem_types[i].prot_sect &= ~PMD_BIT4;
441			mem_types[i].prot_l1 &= ~PMD_BIT4;
442		}
443	} else if (cpu_arch < CPU_ARCH_ARMv6) {
444		for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
445			if (mem_types[i].prot_l1)
446				mem_types[i].prot_l1 |= PMD_BIT4;
447			if (mem_types[i].prot_sect)
448				mem_types[i].prot_sect |= PMD_BIT4;
449		}
450	}
451
452	/*
453	 * Mark the device areas according to the CPU/architecture.
454	 */
455	if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
456		if (!cpu_is_xsc3()) {
457			/*
458			 * Mark device regions on ARMv6+ as execute-never
459			 * to prevent speculative instruction fetches.
460			 */
461			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
462			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
463			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
464			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
465
466			/* Also setup NX memory mapping */
467			mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN;
468		}
469		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
470			/*
471			 * For ARMv7 with TEX remapping,
472			 * - shared device is SXCB=1100
473			 * - nonshared device is SXCB=0100
474			 * - write combine device mem is SXCB=0001
475			 * (Uncached Normal memory)
476			 */
477			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
478			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
479			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
480		} else if (cpu_is_xsc3()) {
481			/*
482			 * For Xscale3,
483			 * - shared device is TEXCB=00101
484			 * - nonshared device is TEXCB=01000
485			 * - write combine device mem is TEXCB=00100
486			 * (Inner/Outer Uncacheable in xsc3 parlance)
487			 */
488			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
489			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
490			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
491		} else {
492			/*
493			 * For ARMv6 and ARMv7 without TEX remapping,
494			 * - shared device is TEXCB=00001
495			 * - nonshared device is TEXCB=01000
496			 * - write combine device mem is TEXCB=00100
497			 * (Uncached Normal in ARMv6 parlance).
498			 */
499			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
500			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
501			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
502		}
503	} else {
504		/*
505		 * On others, write combining is "Uncached/Buffered"
506		 */
507		mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
508	}
509
510	/*
511	 * Now deal with the memory-type mappings
512	 */
513	cp = &cache_policies[cachepolicy];
514	vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
515	s2_pgprot = cp->pte_s2;
516	hyp_device_pgprot = mem_types[MT_DEVICE].prot_pte;
517	s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2;
518
519#ifndef CONFIG_ARM_LPAE
520	/*
521	 * We don't use domains on ARMv6 (since this causes problems with
522	 * v6/v7 kernels), so we must use a separate memory type for user
523	 * r/o, kernel r/w to map the vectors page.
524	 */
525	if (cpu_arch == CPU_ARCH_ARMv6)
526		vecs_pgprot |= L_PTE_MT_VECTORS;
527
528	/*
529	 * Check is it with support for the PXN bit
530	 * in the Short-descriptor translation table format descriptors.
531	 */
532	if (cpu_arch == CPU_ARCH_ARMv7 &&
533		(read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) == 4) {
534		user_pmd_table |= PMD_PXNTABLE;
535	}
536#endif
537
538	/*
539	 * ARMv6 and above have extended page tables.
540	 */
541	if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
542#ifndef CONFIG_ARM_LPAE
543		/*
544		 * Mark cache clean areas and XIP ROM read only
545		 * from SVC mode and no access from userspace.
546		 */
547		mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
548		mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
549		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
550#endif
551
552		/*
553		 * If the initial page tables were created with the S bit
554		 * set, then we need to do the same here for the same
555		 * reasons given in early_cachepolicy().
556		 */
557		if (initial_pmd_value & PMD_SECT_S) {
558			user_pgprot |= L_PTE_SHARED;
559			kern_pgprot |= L_PTE_SHARED;
560			vecs_pgprot |= L_PTE_SHARED;
561			s2_pgprot |= L_PTE_SHARED;
562			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
563			mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
564			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
565			mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
566			mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S;
567			mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
568			mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S;
569			mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;
570			mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
571			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
572			mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
573		}
574	}
575
576	/*
577	 * Non-cacheable Normal - intended for memory areas that must
578	 * not cause dirty cache line writebacks when used
579	 */
580	if (cpu_arch >= CPU_ARCH_ARMv6) {
581		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
582			/* Non-cacheable Normal is XCB = 001 */
583			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
584				PMD_SECT_BUFFERED;
585		} else {
586			/* For both ARMv6 and non-TEX-remapping ARMv7 */
587			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
588				PMD_SECT_TEX(1);
589		}
590	} else {
591		mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
592	}
593
594#ifdef CONFIG_ARM_LPAE
595	/*
596	 * Do not generate access flag faults for the kernel mappings.
597	 */
598	for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
599		mem_types[i].prot_pte |= PTE_EXT_AF;
600		if (mem_types[i].prot_sect)
601			mem_types[i].prot_sect |= PMD_SECT_AF;
602	}
603	kern_pgprot |= PTE_EXT_AF;
604	vecs_pgprot |= PTE_EXT_AF;
605
606	/*
607	 * Set PXN for user mappings
608	 */
609	user_pgprot |= PTE_EXT_PXN;
610#endif
611
612	for (i = 0; i < 16; i++) {
613		pteval_t v = pgprot_val(protection_map[i]);
614		protection_map[i] = __pgprot(v | user_pgprot);
615	}
616
617	mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
618	mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
619
620	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
621	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
622				 L_PTE_DIRTY | kern_pgprot);
623	pgprot_s2  = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | s2_pgprot);
624	pgprot_s2_device  = __pgprot(s2_device_pgprot);
625	pgprot_hyp_device  = __pgprot(hyp_device_pgprot);
626
627	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
628	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
629	mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd;
630	mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot;
631	mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd;
632	mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot;
633	mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
634	mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;
635	mem_types[MT_ROM].prot_sect |= cp->pmd;
636
637	switch (cp->pmd) {
638	case PMD_SECT_WT:
639		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
640		break;
641	case PMD_SECT_WB:
642	case PMD_SECT_WBWA:
643		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
644		break;
645	}
646	pr_info("Memory policy: %sData cache %s\n",
647		ecc_mask ? "ECC enabled, " : "", cp->policy);
648
649	for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
650		struct mem_type *t = &mem_types[i];
651		if (t->prot_l1)
652			t->prot_l1 |= PMD_DOMAIN(t->domain);
653		if (t->prot_sect)
654			t->prot_sect |= PMD_DOMAIN(t->domain);
655	}
656}
657
658#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
659pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
660			      unsigned long size, pgprot_t vma_prot)
661{
662	if (!pfn_valid(pfn))
663		return pgprot_noncached(vma_prot);
664	else if (file->f_flags & O_SYNC)
665		return pgprot_writecombine(vma_prot);
666	return vma_prot;
667}
668EXPORT_SYMBOL(phys_mem_access_prot);
669#endif
670
671#define vectors_base()	(vectors_high() ? 0xffff0000 : 0)
672
673static void __init *early_alloc_aligned(unsigned long sz, unsigned long align)
674{
675	void *ptr = __va(memblock_alloc(sz, align));
676	memset(ptr, 0, sz);
677	return ptr;
678}
679
680static void __init *early_alloc(unsigned long sz)
681{
682	return early_alloc_aligned(sz, sz);
683}
684
685static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot)
686{
687	if (pmd_none(*pmd)) {
688		pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE);
689		__pmd_populate(pmd, __pa(pte), prot);
690	}
691	BUG_ON(pmd_bad(*pmd));
692	return pte_offset_kernel(pmd, addr);
693}
694
695static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
696				  unsigned long end, unsigned long pfn,
697				  const struct mem_type *type)
698{
699	pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1);
700	do {
701		set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
702		pfn++;
703	} while (pte++, addr += PAGE_SIZE, addr != end);
704}
705
706static void __init __map_init_section(pmd_t *pmd, unsigned long addr,
707			unsigned long end, phys_addr_t phys,
708			const struct mem_type *type)
709{
710	pmd_t *p = pmd;
711
712#ifndef CONFIG_ARM_LPAE
713	/*
714	 * In classic MMU format, puds and pmds are folded in to
715	 * the pgds. pmd_offset gives the PGD entry. PGDs refer to a
716	 * group of L1 entries making up one logical pointer to
717	 * an L2 table (2MB), where as PMDs refer to the individual
718	 * L1 entries (1MB). Hence increment to get the correct
719	 * offset for odd 1MB sections.
720	 * (See arch/arm/include/asm/pgtable-2level.h)
721	 */
722	if (addr & SECTION_SIZE)
723		pmd++;
724#endif
725	do {
726		*pmd = __pmd(phys | type->prot_sect);
727		phys += SECTION_SIZE;
728	} while (pmd++, addr += SECTION_SIZE, addr != end);
729
730	flush_pmd_entry(p);
731}
732
733static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
734				      unsigned long end, phys_addr_t phys,
735				      const struct mem_type *type)
736{
737	pmd_t *pmd = pmd_offset(pud, addr);
738	unsigned long next;
739
740	do {
741		/*
742		 * With LPAE, we must loop over to map
743		 * all the pmds for the given range.
744		 */
745		next = pmd_addr_end(addr, end);
746
747		/*
748		 * Try a section mapping - addr, next and phys must all be
749		 * aligned to a section boundary.
750		 */
751		if (type->prot_sect &&
752				((addr | next | phys) & ~SECTION_MASK) == 0) {
753			__map_init_section(pmd, addr, next, phys, type);
754		} else {
755			alloc_init_pte(pmd, addr, next,
756						__phys_to_pfn(phys), type);
757		}
758
759		phys += next - addr;
760
761	} while (pmd++, addr = next, addr != end);
762}
763
764static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
765				  unsigned long end, phys_addr_t phys,
766				  const struct mem_type *type)
767{
768	pud_t *pud = pud_offset(pgd, addr);
769	unsigned long next;
770
771	do {
772		next = pud_addr_end(addr, end);
773		alloc_init_pmd(pud, addr, next, phys, type);
774		phys += next - addr;
775	} while (pud++, addr = next, addr != end);
776}
777
778#ifndef CONFIG_ARM_LPAE
779static void __init create_36bit_mapping(struct map_desc *md,
780					const struct mem_type *type)
781{
782	unsigned long addr, length, end;
783	phys_addr_t phys;
784	pgd_t *pgd;
785
786	addr = md->virtual;
787	phys = __pfn_to_phys(md->pfn);
788	length = PAGE_ALIGN(md->length);
789
790	if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
791		pr_err("MM: CPU does not support supersection mapping for 0x%08llx at 0x%08lx\n",
792		       (long long)__pfn_to_phys((u64)md->pfn), addr);
793		return;
794	}
795
796	/* N.B.	ARMv6 supersections are only defined to work with domain 0.
797	 *	Since domain assignments can in fact be arbitrary, the
798	 *	'domain == 0' check below is required to insure that ARMv6
799	 *	supersections are only allocated for domain 0 regardless
800	 *	of the actual domain assignments in use.
801	 */
802	if (type->domain) {
803		pr_err("MM: invalid domain in supersection mapping for 0x%08llx at 0x%08lx\n",
804		       (long long)__pfn_to_phys((u64)md->pfn), addr);
805		return;
806	}
807
808	if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
809		pr_err("MM: cannot create mapping for 0x%08llx at 0x%08lx invalid alignment\n",
810		       (long long)__pfn_to_phys((u64)md->pfn), addr);
811		return;
812	}
813
814	/*
815	 * Shift bits [35:32] of address into bits [23:20] of PMD
816	 * (See ARMv6 spec).
817	 */
818	phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
819
820	pgd = pgd_offset_k(addr);
821	end = addr + length;
822	do {
823		pud_t *pud = pud_offset(pgd, addr);
824		pmd_t *pmd = pmd_offset(pud, addr);
825		int i;
826
827		for (i = 0; i < 16; i++)
828			*pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
829
830		addr += SUPERSECTION_SIZE;
831		phys += SUPERSECTION_SIZE;
832		pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
833	} while (addr != end);
834}
835#endif	/* !CONFIG_ARM_LPAE */
836
837/*
838 * Create the page directory entries and any necessary
839 * page tables for the mapping specified by `md'.  We
840 * are able to cope here with varying sizes and address
841 * offsets, and we take full advantage of sections and
842 * supersections.
843 */
844static void __init create_mapping(struct map_desc *md)
845{
846	unsigned long addr, length, end;
847	phys_addr_t phys;
848	const struct mem_type *type;
849	pgd_t *pgd;
850
851	if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
852		pr_warn("BUG: not creating mapping for 0x%08llx at 0x%08lx in user region\n",
853			(long long)__pfn_to_phys((u64)md->pfn), md->virtual);
854		return;
855	}
856
857	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
858	    md->virtual >= PAGE_OFFSET &&
859	    (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
860		pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
861			(long long)__pfn_to_phys((u64)md->pfn), md->virtual);
862	}
863
864	type = &mem_types[md->type];
865
866#ifndef CONFIG_ARM_LPAE
867	/*
868	 * Catch 36-bit addresses
869	 */
870	if (md->pfn >= 0x100000) {
871		create_36bit_mapping(md, type);
872		return;
873	}
874#endif
875
876	addr = md->virtual & PAGE_MASK;
877	phys = __pfn_to_phys(md->pfn);
878	length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
879
880	if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
881		pr_warn("BUG: map for 0x%08llx at 0x%08lx can not be mapped using pages, ignoring.\n",
882			(long long)__pfn_to_phys(md->pfn), addr);
883		return;
884	}
885
886	pgd = pgd_offset_k(addr);
887	end = addr + length;
888	do {
889		unsigned long next = pgd_addr_end(addr, end);
890
891		alloc_init_pud(pgd, addr, next, phys, type);
892
893		phys += next - addr;
894		addr = next;
895	} while (pgd++, addr != end);
896}
897
898/*
899 * Create the architecture specific mappings
900 */
901void __init iotable_init(struct map_desc *io_desc, int nr)
902{
903	struct map_desc *md;
904	struct vm_struct *vm;
905	struct static_vm *svm;
906
907	if (!nr)
908		return;
909
910	svm = early_alloc_aligned(sizeof(*svm) * nr, __alignof__(*svm));
911
912	for (md = io_desc; nr; md++, nr--) {
913		create_mapping(md);
914
915		vm = &svm->vm;
916		vm->addr = (void *)(md->virtual & PAGE_MASK);
917		vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
918		vm->phys_addr = __pfn_to_phys(md->pfn);
919		vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
920		vm->flags |= VM_ARM_MTYPE(md->type);
921		vm->caller = iotable_init;
922		add_static_vm_early(svm++);
923	}
924}
925
926void __init vm_reserve_area_early(unsigned long addr, unsigned long size,
927				  void *caller)
928{
929	struct vm_struct *vm;
930	struct static_vm *svm;
931
932	svm = early_alloc_aligned(sizeof(*svm), __alignof__(*svm));
933
934	vm = &svm->vm;
935	vm->addr = (void *)addr;
936	vm->size = size;
937	vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING;
938	vm->caller = caller;
939	add_static_vm_early(svm);
940}
941
942#ifndef CONFIG_ARM_LPAE
943
944/*
945 * The Linux PMD is made of two consecutive section entries covering 2MB
946 * (see definition in include/asm/pgtable-2level.h).  However a call to
947 * create_mapping() may optimize static mappings by using individual
948 * 1MB section mappings.  This leaves the actual PMD potentially half
949 * initialized if the top or bottom section entry isn't used, leaving it
950 * open to problems if a subsequent ioremap() or vmalloc() tries to use
951 * the virtual space left free by that unused section entry.
952 *
953 * Let's avoid the issue by inserting dummy vm entries covering the unused
954 * PMD halves once the static mappings are in place.
955 */
956
957static void __init pmd_empty_section_gap(unsigned long addr)
958{
959	vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap);
960}
961
962static void __init fill_pmd_gaps(void)
963{
964	struct static_vm *svm;
965	struct vm_struct *vm;
966	unsigned long addr, next = 0;
967	pmd_t *pmd;
968
969	list_for_each_entry(svm, &static_vmlist, list) {
970		vm = &svm->vm;
971		addr = (unsigned long)vm->addr;
972		if (addr < next)
973			continue;
974
975		/*
976		 * Check if this vm starts on an odd section boundary.
977		 * If so and the first section entry for this PMD is free
978		 * then we block the corresponding virtual address.
979		 */
980		if ((addr & ~PMD_MASK) == SECTION_SIZE) {
981			pmd = pmd_off_k(addr);
982			if (pmd_none(*pmd))
983				pmd_empty_section_gap(addr & PMD_MASK);
984		}
985
986		/*
987		 * Then check if this vm ends on an odd section boundary.
988		 * If so and the second section entry for this PMD is empty
989		 * then we block the corresponding virtual address.
990		 */
991		addr += vm->size;
992		if ((addr & ~PMD_MASK) == SECTION_SIZE) {
993			pmd = pmd_off_k(addr) + 1;
994			if (pmd_none(*pmd))
995				pmd_empty_section_gap(addr);
996		}
997
998		/* no need to look at any vm entry until we hit the next PMD */
999		next = (addr + PMD_SIZE - 1) & PMD_MASK;
1000	}
1001}
1002
1003#else
1004#define fill_pmd_gaps() do { } while (0)
1005#endif
1006
1007#if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H)
1008static void __init pci_reserve_io(void)
1009{
1010	struct static_vm *svm;
1011
1012	svm = find_static_vm_vaddr((void *)PCI_IO_VIRT_BASE);
1013	if (svm)
1014		return;
1015
1016	vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io);
1017}
1018#else
1019#define pci_reserve_io() do { } while (0)
1020#endif
1021
1022#ifdef CONFIG_DEBUG_LL
1023void __init debug_ll_io_init(void)
1024{
1025	struct map_desc map;
1026
1027	debug_ll_addr(&map.pfn, &map.virtual);
1028	if (!map.pfn || !map.virtual)
1029		return;
1030	map.pfn = __phys_to_pfn(map.pfn);
1031	map.virtual &= PAGE_MASK;
1032	map.length = PAGE_SIZE;
1033	map.type = MT_DEVICE;
1034	iotable_init(&map, 1);
1035}
1036#endif
1037
1038static void * __initdata vmalloc_min =
1039	(void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
1040
1041/*
1042 * vmalloc=size forces the vmalloc area to be exactly 'size'
1043 * bytes. This can be used to increase (or decrease) the vmalloc
1044 * area - the default is 240m.
1045 */
1046static int __init early_vmalloc(char *arg)
1047{
1048	unsigned long vmalloc_reserve = memparse(arg, NULL);
1049
1050	if (vmalloc_reserve < SZ_16M) {
1051		vmalloc_reserve = SZ_16M;
1052		pr_warn("vmalloc area too small, limiting to %luMB\n",
1053			vmalloc_reserve >> 20);
1054	}
1055
1056	if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
1057		vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
1058		pr_warn("vmalloc area is too big, limiting to %luMB\n",
1059			vmalloc_reserve >> 20);
1060	}
1061
1062	vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve);
1063	return 0;
1064}
1065early_param("vmalloc", early_vmalloc);
1066
1067phys_addr_t arm_lowmem_limit __initdata = 0;
1068
1069void __init sanity_check_meminfo(void)
1070{
1071	phys_addr_t memblock_limit = 0;
1072	int highmem = 0;
1073	phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
1074	struct memblock_region *reg;
1075
1076	for_each_memblock(memory, reg) {
1077		phys_addr_t block_start = reg->base;
1078		phys_addr_t block_end = reg->base + reg->size;
1079		phys_addr_t size_limit = reg->size;
1080
1081		if (reg->base >= vmalloc_limit)
1082			highmem = 1;
1083		else
1084			size_limit = vmalloc_limit - reg->base;
1085
1086
1087		if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
1088
1089			if (highmem) {
1090				pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n",
1091					  &block_start, &block_end);
1092				memblock_remove(reg->base, reg->size);
1093				continue;
1094			}
1095
1096			if (reg->size > size_limit) {
1097				phys_addr_t overlap_size = reg->size - size_limit;
1098
1099				pr_notice("Truncating RAM at %pa-%pa to -%pa",
1100					  &block_start, &block_end, &vmalloc_limit);
1101				memblock_remove(vmalloc_limit, overlap_size);
1102				block_end = vmalloc_limit;
1103			}
1104		}
1105
1106		if (!highmem) {
1107			if (block_end > arm_lowmem_limit) {
1108				if (reg->size > size_limit)
1109					arm_lowmem_limit = vmalloc_limit;
1110				else
1111					arm_lowmem_limit = block_end;
1112			}
1113
1114			/*
1115			 * Find the first non-pmd-aligned page, and point
1116			 * memblock_limit at it. This relies on rounding the
1117			 * limit down to be pmd-aligned, which happens at the
1118			 * end of this function.
1119			 *
1120			 * With this algorithm, the start or end of almost any
1121			 * bank can be non-pmd-aligned. The only exception is
1122			 * that the start of the bank 0 must be section-
1123			 * aligned, since otherwise memory would need to be
1124			 * allocated when mapping the start of bank 0, which
1125			 * occurs before any free memory is mapped.
1126			 */
1127			if (!memblock_limit) {
1128				if (!IS_ALIGNED(block_start, PMD_SIZE))
1129					memblock_limit = block_start;
1130				else if (!IS_ALIGNED(block_end, PMD_SIZE))
1131					memblock_limit = arm_lowmem_limit;
1132			}
1133
1134		}
1135	}
1136
1137	high_memory = __va(arm_lowmem_limit - 1) + 1;
1138
1139	/*
1140	 * Round the memblock limit down to a pmd size.  This
1141	 * helps to ensure that we will allocate memory from the
1142	 * last full pmd, which should be mapped.
1143	 */
1144	if (memblock_limit)
1145		memblock_limit = round_down(memblock_limit, PMD_SIZE);
1146	if (!memblock_limit)
1147		memblock_limit = arm_lowmem_limit;
1148
1149	memblock_set_current_limit(memblock_limit);
1150}
1151
1152static inline void prepare_page_table(void)
1153{
1154	unsigned long addr;
1155	phys_addr_t end;
1156
1157	/*
1158	 * Clear out all the mappings below the kernel image.
1159	 */
1160	for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
1161		pmd_clear(pmd_off_k(addr));
1162
1163#ifdef CONFIG_XIP_KERNEL
1164	/* The XIP kernel is mapped in the module area -- skip over it */
1165	addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK;
1166#endif
1167	for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE)
1168		pmd_clear(pmd_off_k(addr));
1169
1170	/*
1171	 * Find the end of the first block of lowmem.
1172	 */
1173	end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
1174	if (end >= arm_lowmem_limit)
1175		end = arm_lowmem_limit;
1176
1177	/*
1178	 * Clear out all the kernel space mappings, except for the first
1179	 * memory bank, up to the vmalloc region.
1180	 */
1181	for (addr = __phys_to_virt(end);
1182	     addr < VMALLOC_START; addr += PMD_SIZE)
1183		pmd_clear(pmd_off_k(addr));
1184}
1185
1186#ifdef CONFIG_ARM_LPAE
1187/* the first page is reserved for pgd */
1188#define SWAPPER_PG_DIR_SIZE	(PAGE_SIZE + \
1189				 PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t))
1190#else
1191#define SWAPPER_PG_DIR_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
1192#endif
1193
1194/*
1195 * Reserve the special regions of memory
1196 */
1197void __init arm_mm_memblock_reserve(void)
1198{
1199	/*
1200	 * Reserve the page tables.  These are already in use,
1201	 * and can only be in node 0.
1202	 */
1203	memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE);
1204
1205#ifdef CONFIG_SA1111
1206	/*
1207	 * Because of the SA1111 DMA bug, we want to preserve our
1208	 * precious DMA-able memory...
1209	 */
1210	memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET);
1211#endif
1212}
1213
1214/*
1215 * Set up the device mappings.  Since we clear out the page tables for all
1216 * mappings above VMALLOC_START, we will remove any debug device mappings.
1217 * This means you have to be careful how you debug this function, or any
1218 * called function.  This means you can't use any function or debugging
1219 * method which may touch any device, otherwise the kernel _will_ crash.
1220 */
1221static void __init devicemaps_init(const struct machine_desc *mdesc)
1222{
1223	struct map_desc map;
1224	unsigned long addr;
1225	void *vectors;
1226
1227	/*
1228	 * Allocate the vector page early.
1229	 */
1230	vectors = early_alloc(PAGE_SIZE * 2);
1231
1232	early_trap_init(vectors);
1233
1234	for (addr = VMALLOC_START; addr; addr += PMD_SIZE)
1235		pmd_clear(pmd_off_k(addr));
1236
1237	/*
1238	 * Map the kernel if it is XIP.
1239	 * It is always first in the modulearea.
1240	 */
1241#ifdef CONFIG_XIP_KERNEL
1242	map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
1243	map.virtual = MODULES_VADDR;
1244	map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
1245	map.type = MT_ROM;
1246	create_mapping(&map);
1247#endif
1248
1249	/*
1250	 * Map the cache flushing regions.
1251	 */
1252#ifdef FLUSH_BASE
1253	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
1254	map.virtual = FLUSH_BASE;
1255	map.length = SZ_1M;
1256	map.type = MT_CACHECLEAN;
1257	create_mapping(&map);
1258#endif
1259#ifdef FLUSH_BASE_MINICACHE
1260	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
1261	map.virtual = FLUSH_BASE_MINICACHE;
1262	map.length = SZ_1M;
1263	map.type = MT_MINICLEAN;
1264	create_mapping(&map);
1265#endif
1266
1267	/*
1268	 * Create a mapping for the machine vectors at the high-vectors
1269	 * location (0xffff0000).  If we aren't using high-vectors, also
1270	 * create a mapping at the low-vectors virtual address.
1271	 */
1272	map.pfn = __phys_to_pfn(virt_to_phys(vectors));
1273	map.virtual = 0xffff0000;
1274	map.length = PAGE_SIZE;
1275#ifdef CONFIG_KUSER_HELPERS
1276	map.type = MT_HIGH_VECTORS;
1277#else
1278	map.type = MT_LOW_VECTORS;
1279#endif
1280	create_mapping(&map);
1281
1282	if (!vectors_high()) {
1283		map.virtual = 0;
1284		map.length = PAGE_SIZE * 2;
1285		map.type = MT_LOW_VECTORS;
1286		create_mapping(&map);
1287	}
1288
1289	/* Now create a kernel read-only mapping */
1290	map.pfn += 1;
1291	map.virtual = 0xffff0000 + PAGE_SIZE;
1292	map.length = PAGE_SIZE;
1293	map.type = MT_LOW_VECTORS;
1294	create_mapping(&map);
1295
1296	/*
1297	 * Ask the machine support to map in the statically mapped devices.
1298	 */
1299	if (mdesc->map_io)
1300		mdesc->map_io();
1301	else
1302		debug_ll_io_init();
1303	fill_pmd_gaps();
1304
1305	/* Reserve fixed i/o space in VMALLOC region */
1306	pci_reserve_io();
1307
1308	/*
1309	 * Finally flush the caches and tlb to ensure that we're in a
1310	 * consistent state wrt the writebuffer.  This also ensures that
1311	 * any write-allocated cache lines in the vector page are written
1312	 * back.  After this point, we can start to touch devices again.
1313	 */
1314	local_flush_tlb_all();
1315	flush_cache_all();
1316}
1317
1318static void __init kmap_init(void)
1319{
1320#ifdef CONFIG_HIGHMEM
1321	pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
1322		PKMAP_BASE, _PAGE_KERNEL_TABLE);
1323#endif
1324
1325	early_pte_alloc(pmd_off_k(FIXADDR_START), FIXADDR_START,
1326			_PAGE_KERNEL_TABLE);
1327}
1328
1329static void __init map_lowmem(void)
1330{
1331	struct memblock_region *reg;
1332	phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
1333	phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
1334
1335	/* Map all the lowmem memory banks. */
1336	for_each_memblock(memory, reg) {
1337		phys_addr_t start = reg->base;
1338		phys_addr_t end = start + reg->size;
1339		struct map_desc map;
1340
1341		if (end > arm_lowmem_limit)
1342			end = arm_lowmem_limit;
1343		if (start >= end)
1344			break;
1345
1346		if (end < kernel_x_start) {
1347			map.pfn = __phys_to_pfn(start);
1348			map.virtual = __phys_to_virt(start);
1349			map.length = end - start;
1350			map.type = MT_MEMORY_RWX;
1351
1352			create_mapping(&map);
1353		} else if (start >= kernel_x_end) {
1354			map.pfn = __phys_to_pfn(start);
1355			map.virtual = __phys_to_virt(start);
1356			map.length = end - start;
1357			map.type = MT_MEMORY_RW;
1358
1359			create_mapping(&map);
1360		} else {
1361			/* This better cover the entire kernel */
1362			if (start < kernel_x_start) {
1363				map.pfn = __phys_to_pfn(start);
1364				map.virtual = __phys_to_virt(start);
1365				map.length = kernel_x_start - start;
1366				map.type = MT_MEMORY_RW;
1367
1368				create_mapping(&map);
1369			}
1370
1371			map.pfn = __phys_to_pfn(kernel_x_start);
1372			map.virtual = __phys_to_virt(kernel_x_start);
1373			map.length = kernel_x_end - kernel_x_start;
1374			map.type = MT_MEMORY_RWX;
1375
1376			create_mapping(&map);
1377
1378			if (kernel_x_end < end) {
1379				map.pfn = __phys_to_pfn(kernel_x_end);
1380				map.virtual = __phys_to_virt(kernel_x_end);
1381				map.length = end - kernel_x_end;
1382				map.type = MT_MEMORY_RW;
1383
1384				create_mapping(&map);
1385			}
1386		}
1387	}
1388}
1389
1390#ifdef CONFIG_ARM_LPAE
1391/*
1392 * early_paging_init() recreates boot time page table setup, allowing machines
1393 * to switch over to a high (>4G) address space on LPAE systems
1394 */
1395void __init early_paging_init(const struct machine_desc *mdesc,
1396			      struct proc_info_list *procinfo)
1397{
1398	pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags;
1399	unsigned long map_start, map_end;
1400	pgd_t *pgd0, *pgdk;
1401	pud_t *pud0, *pudk, *pud_start;
1402	pmd_t *pmd0, *pmdk;
1403	phys_addr_t phys;
1404	int i;
1405
1406	if (!(mdesc->init_meminfo))
1407		return;
1408
1409	/* remap kernel code and data */
1410	map_start = init_mm.start_code & PMD_MASK;
1411	map_end   = ALIGN(init_mm.brk, PMD_SIZE);
1412
1413	/* get a handle on things... */
1414	pgd0 = pgd_offset_k(0);
1415	pud_start = pud0 = pud_offset(pgd0, 0);
1416	pmd0 = pmd_offset(pud0, 0);
1417
1418	pgdk = pgd_offset_k(map_start);
1419	pudk = pud_offset(pgdk, map_start);
1420	pmdk = pmd_offset(pudk, map_start);
1421
1422	mdesc->init_meminfo();
1423
1424	/* Run the patch stub to update the constants */
1425	fixup_pv_table(&__pv_table_begin,
1426		(&__pv_table_end - &__pv_table_begin) << 2);
1427
1428	/*
1429	 * Cache cleaning operations for self-modifying code
1430	 * We should clean the entries by MVA but running a
1431	 * for loop over every pv_table entry pointer would
1432	 * just complicate the code.
1433	 */
1434	flush_cache_louis();
1435	dsb(ishst);
1436	isb();
1437
1438	/*
1439	 * FIXME: This code is not architecturally compliant: we modify
1440	 * the mappings in-place, indeed while they are in use by this
1441	 * very same code.  This may lead to unpredictable behaviour of
1442	 * the CPU.
1443	 *
1444	 * Even modifying the mappings in a separate page table does
1445	 * not resolve this.
1446	 *
1447	 * The architecture strongly recommends that when a mapping is
1448	 * changed, that it is changed by first going via an invalid
1449	 * mapping and back to the new mapping.  This is to ensure that
1450	 * no TLB conflicts (caused by the TLB having more than one TLB
1451	 * entry match a translation) can occur.  However, doing that
1452	 * here will result in unmapping the code we are running.
1453	 */
1454	pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n");
1455	add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
1456
1457	/*
1458	 * Remap level 1 table.  This changes the physical addresses
1459	 * used to refer to the level 2 page tables to the high
1460	 * physical address alias, leaving everything else the same.
1461	 */
1462	for (i = 0; i < PTRS_PER_PGD; pud0++, i++) {
1463		set_pud(pud0,
1464			__pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER));
1465		pmd0 += PTRS_PER_PMD;
1466	}
1467
1468	/*
1469	 * Remap the level 2 table, pointing the mappings at the high
1470	 * physical address alias of these pages.
1471	 */
1472	phys = __pa(map_start);
1473	do {
1474		*pmdk++ = __pmd(phys | pmdprot);
1475		phys += PMD_SIZE;
1476	} while (phys < map_end);
1477
1478	/*
1479	 * Ensure that the above updates are flushed out of the cache.
1480	 * This is not strictly correct; on a system where the caches
1481	 * are coherent with each other, but the MMU page table walks
1482	 * may not be coherent, flush_cache_all() may be a no-op, and
1483	 * this will fail.
1484	 */
1485	flush_cache_all();
1486
1487	/*
1488	 * Re-write the TTBR values to point them at the high physical
1489	 * alias of the page tables.  We expect __va() will work on
1490	 * cpu_get_pgd(), which returns the value of TTBR0.
1491	 */
1492	cpu_switch_mm(pgd0, &init_mm);
1493	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
1494
1495	/* Finally flush any stale TLB values. */
1496	local_flush_bp_all();
1497	local_flush_tlb_all();
1498}
1499
1500#else
1501
1502void __init early_paging_init(const struct machine_desc *mdesc,
1503			      struct proc_info_list *procinfo)
1504{
1505	if (mdesc->init_meminfo)
1506		mdesc->init_meminfo();
1507}
1508
1509#endif
1510
1511/*
1512 * paging_init() sets up the page tables, initialises the zone memory
1513 * maps, and sets up the zero page, bad page and bad page tables.
1514 */
1515void __init paging_init(const struct machine_desc *mdesc)
1516{
1517	void *zero_page;
1518
1519	build_mem_type_table();
1520	prepare_page_table();
1521	map_lowmem();
1522	dma_contiguous_remap();
1523	devicemaps_init(mdesc);
1524	kmap_init();
1525	tcm_init();
1526
1527	top_pmd = pmd_off_k(0xffff0000);
1528
1529	/* allocate the zero page. */
1530	zero_page = early_alloc(PAGE_SIZE);
1531
1532	bootmem_init();
1533
1534	empty_zero_page = virt_to_page(zero_page);
1535	__flush_dcache_page(NULL, empty_zero_page);
1536}
1537