1#include <linux/seq_file.h>
2#include <linux/debugfs.h>
3#include <linux/module.h>
4#include <linux/mm.h>
5#include <asm/sections.h>
6#include <asm/pgtable.h>
7
8static unsigned long max_addr;
9
10struct addr_marker {
11	unsigned long start_address;
12	const char *name;
13};
14
15enum address_markers_idx {
16	IDENTITY_NR = 0,
17	KERNEL_START_NR,
18	KERNEL_END_NR,
19	VMEMMAP_NR,
20	VMALLOC_NR,
21	MODULES_NR,
22};
23
24static struct addr_marker address_markers[] = {
25	[IDENTITY_NR]	  = {0, "Identity Mapping"},
26	[KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
27	[KERNEL_END_NR]	  = {(unsigned long)&_end, "Kernel Image End"},
28	[VMEMMAP_NR]	  = {0, "vmemmap Area"},
29	[VMALLOC_NR]	  = {0, "vmalloc Area"},
30	[MODULES_NR]	  = {0, "Modules Area"},
31	{ -1, NULL }
32};
33
34struct pg_state {
35	int level;
36	unsigned int current_prot;
37	unsigned long start_address;
38	unsigned long current_address;
39	const struct addr_marker *marker;
40};
41
42static void print_prot(struct seq_file *m, unsigned int pr, int level)
43{
44	static const char * const level_name[] =
45		{ "ASCE", "PGD", "PUD", "PMD", "PTE" };
46
47	seq_printf(m, "%s ", level_name[level]);
48	if (pr & _PAGE_INVALID) {
49		seq_printf(m, "I\n");
50		return;
51	}
52	seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
53	seq_putc(m, '\n');
54}
55
56static void note_page(struct seq_file *m, struct pg_state *st,
57		     unsigned int new_prot, int level)
58{
59	static const char units[] = "KMGTPE";
60	int width = sizeof(unsigned long) * 2;
61	const char *unit = units;
62	unsigned int prot, cur;
63	unsigned long delta;
64
65	/*
66	 * If we have a "break" in the series, we need to flush the state
67	 * that we have now. "break" is either changing perms, levels or
68	 * address space marker.
69	 */
70	prot = new_prot;
71	cur = st->current_prot;
72
73	if (!st->level) {
74		/* First entry */
75		st->current_prot = new_prot;
76		st->level = level;
77		st->marker = address_markers;
78		seq_printf(m, "---[ %s ]---\n", st->marker->name);
79	} else if (prot != cur || level != st->level ||
80		   st->current_address >= st->marker[1].start_address) {
81		/* Print the actual finished series */
82		seq_printf(m, "0x%0*lx-0x%0*lx",
83			   width, st->start_address,
84			   width, st->current_address);
85		delta = (st->current_address - st->start_address) >> 10;
86		while (!(delta & 0x3ff) && unit[1]) {
87			delta >>= 10;
88			unit++;
89		}
90		seq_printf(m, "%9lu%c ", delta, *unit);
91		print_prot(m, st->current_prot, st->level);
92		if (st->current_address >= st->marker[1].start_address) {
93			st->marker++;
94			seq_printf(m, "---[ %s ]---\n", st->marker->name);
95		}
96		st->start_address = st->current_address;
97		st->current_prot = new_prot;
98		st->level = level;
99	}
100}
101
102/*
103 * The actual page table walker functions. In order to keep the
104 * implementation of print_prot() short, we only check and pass
105 * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
106 * segment or page table entry is invalid or read-only.
107 * After all it's just a hint that the current level being walked
108 * contains an invalid or read-only entry.
109 */
110static void walk_pte_level(struct seq_file *m, struct pg_state *st,
111			   pmd_t *pmd, unsigned long addr)
112{
113	unsigned int prot;
114	pte_t *pte;
115	int i;
116
117	for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
118		st->current_address = addr;
119		pte = pte_offset_kernel(pmd, addr);
120		prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID);
121		note_page(m, st, prot, 4);
122		addr += PAGE_SIZE;
123	}
124}
125
126static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
127			   pud_t *pud, unsigned long addr)
128{
129	unsigned int prot;
130	pmd_t *pmd;
131	int i;
132
133	for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
134		st->current_address = addr;
135		pmd = pmd_offset(pud, addr);
136		if (!pmd_none(*pmd)) {
137			if (pmd_large(*pmd)) {
138				prot = pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT;
139				note_page(m, st, prot, 3);
140			} else
141				walk_pte_level(m, st, pmd, addr);
142		} else
143			note_page(m, st, _PAGE_INVALID, 3);
144		addr += PMD_SIZE;
145	}
146}
147
148static void walk_pud_level(struct seq_file *m, struct pg_state *st,
149			   pgd_t *pgd, unsigned long addr)
150{
151	unsigned int prot;
152	pud_t *pud;
153	int i;
154
155	for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
156		st->current_address = addr;
157		pud = pud_offset(pgd, addr);
158		if (!pud_none(*pud))
159			if (pud_large(*pud)) {
160				prot = pud_val(*pud) & _REGION3_ENTRY_RO;
161				note_page(m, st, prot, 2);
162			} else
163				walk_pmd_level(m, st, pud, addr);
164		else
165			note_page(m, st, _PAGE_INVALID, 2);
166		addr += PUD_SIZE;
167	}
168}
169
170static void walk_pgd_level(struct seq_file *m)
171{
172	unsigned long addr = 0;
173	struct pg_state st;
174	pgd_t *pgd;
175	int i;
176
177	memset(&st, 0, sizeof(st));
178	for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
179		st.current_address = addr;
180		pgd = pgd_offset_k(addr);
181		if (!pgd_none(*pgd))
182			walk_pud_level(m, &st, pgd, addr);
183		else
184			note_page(m, &st, _PAGE_INVALID, 1);
185		addr += PGDIR_SIZE;
186	}
187	/* Flush out the last page */
188	st.current_address = max_addr;
189	note_page(m, &st, 0, 0);
190}
191
192static int ptdump_show(struct seq_file *m, void *v)
193{
194	walk_pgd_level(m);
195	return 0;
196}
197
198static int ptdump_open(struct inode *inode, struct file *filp)
199{
200	return single_open(filp, ptdump_show, NULL);
201}
202
203static const struct file_operations ptdump_fops = {
204	.open		= ptdump_open,
205	.read		= seq_read,
206	.llseek		= seq_lseek,
207	.release	= single_release,
208};
209
210static int pt_dump_init(void)
211{
212	/*
213	 * Figure out the maximum virtual address being accessible with the
214	 * kernel ASCE. We need this to keep the page table walker functions
215	 * from accessing non-existent entries.
216	 */
217	max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
218	max_addr = 1UL << (max_addr * 11 + 31);
219	address_markers[MODULES_NR].start_address = MODULES_VADDR;
220	address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
221	address_markers[VMALLOC_NR].start_address = VMALLOC_START;
222	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
223	return 0;
224}
225device_initcall(pt_dump_init);
226